<?xml version="1.0" ?>
<root date="2022-12-05">
  <extension name="3DNOW">
    <instruction asm="FEMMS" category="MMX" cpl="3" extension="3DNOW" iclass="FEMMS" iform="FEMMS" isa-set="3DNOW" string="FEMMS" url="uops.info/html-instr/FEMMS.html"/>
    <instruction asm="PAVGUSB" category="3DNOW" cpl="3" extension="3DNOW" iclass="PAVGUSB" iform="PAVGUSB_MMXq_MEMq" isa-set="3DNOW" string="PAVGUSB (MM, M64)" url="uops.info/html-instr/PAVGUSB_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PAVGUSB" category="3DNOW" cpl="3" extension="3DNOW" iclass="PAVGUSB" iform="PAVGUSB_MMXq_MMXq" isa-set="3DNOW" string="PAVGUSB (MM, MM)" url="uops.info/html-instr/PAVGUSB_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PF2ID" category="3DNOW" cpl="3" extension="3DNOW" iclass="PF2ID" iform="PF2ID_MMXq_MEMq" isa-set="3DNOW" string="PF2ID (MM, M64)" url="uops.info/html-instr/PF2ID_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PF2ID" category="3DNOW" cpl="3" extension="3DNOW" iclass="PF2ID" iform="PF2ID_MMXq_MMXq" isa-set="3DNOW" string="PF2ID (MM, MM)" url="uops.info/html-instr/PF2ID_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PF2IW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PF2IW" iform="PF2IW_MMXq_MEMq" isa-set="3DNOW" string="PF2IW (MM, M64)" url="uops.info/html-instr/PF2IW_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PF2IW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PF2IW" iform="PF2IW_MMXq_MMXq" isa-set="3DNOW" string="PF2IW (MM, MM)" url="uops.info/html-instr/PF2IW_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFACC" iform="PFACC_MMXq_MEMq" isa-set="3DNOW" string="PFACC (MM, M64)" url="uops.info/html-instr/PFACC_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFACC" iform="PFACC_MMXq_MMXq" isa-set="3DNOW" string="PFACC (MM, MM)" url="uops.info/html-instr/PFACC_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFADD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFADD" iform="PFADD_MMXq_MEMq" isa-set="3DNOW" string="PFADD (MM, M64)" url="uops.info/html-instr/PFADD_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFADD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFADD" iform="PFADD_MMXq_MMXq" isa-set="3DNOW" string="PFADD (MM, MM)" url="uops.info/html-instr/PFADD_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFCMPEQ" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPEQ" iform="PFCMPEQ_MMXq_MEMq" isa-set="3DNOW" string="PFCMPEQ (MM, M64)" url="uops.info/html-instr/PFCMPEQ_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFCMPEQ" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPEQ" iform="PFCMPEQ_MMXq_MMXq" isa-set="3DNOW" string="PFCMPEQ (MM, MM)" url="uops.info/html-instr/PFCMPEQ_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFCMPGE" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPGE" iform="PFCMPGE_MMXq_MEMq" isa-set="3DNOW" string="PFCMPGE (MM, M64)" url="uops.info/html-instr/PFCMPGE_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFCMPGE" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPGE" iform="PFCMPGE_MMXq_MMXq" isa-set="3DNOW" string="PFCMPGE (MM, MM)" url="uops.info/html-instr/PFCMPGE_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFCMPGT" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPGT" iform="PFCMPGT_MMXq_MEMq" isa-set="3DNOW" string="PFCMPGT (MM, M64)" url="uops.info/html-instr/PFCMPGT_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFCMPGT" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPGT" iform="PFCMPGT_MMXq_MMXq" isa-set="3DNOW" string="PFCMPGT (MM, MM)" url="uops.info/html-instr/PFCMPGT_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFMAX" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMAX" iform="PFMAX_MMXq_MEMq" isa-set="3DNOW" string="PFMAX (MM, M64)" url="uops.info/html-instr/PFMAX_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFMAX" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMAX" iform="PFMAX_MMXq_MMXq" isa-set="3DNOW" string="PFMAX (MM, MM)" url="uops.info/html-instr/PFMAX_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFMIN" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMIN" iform="PFMIN_MMXq_MEMq" isa-set="3DNOW" string="PFMIN (MM, M64)" url="uops.info/html-instr/PFMIN_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFMIN" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMIN" iform="PFMIN_MMXq_MMXq" isa-set="3DNOW" string="PFMIN (MM, MM)" url="uops.info/html-instr/PFMIN_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFMUL" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMUL" iform="PFMUL_MMXq_MEMq" isa-set="3DNOW" string="PFMUL (MM, M64)" url="uops.info/html-instr/PFMUL_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFMUL" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMUL" iform="PFMUL_MMXq_MMXq" isa-set="3DNOW" string="PFMUL (MM, MM)" url="uops.info/html-instr/PFMUL_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFNACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFNACC" iform="PFNACC_MMXq_MEMq" isa-set="3DNOW" string="PFNACC (MM, M64)" url="uops.info/html-instr/PFNACC_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFNACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFNACC" iform="PFNACC_MMXq_MMXq" isa-set="3DNOW" string="PFNACC (MM, MM)" url="uops.info/html-instr/PFNACC_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFPNACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFPNACC" iform="PFPNACC_MMXq_MEMq" isa-set="3DNOW" string="PFPNACC (MM, M64)" url="uops.info/html-instr/PFPNACC_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFPNACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFPNACC" iform="PFPNACC_MMXq_MMXq" isa-set="3DNOW" string="PFPNACC (MM, MM)" url="uops.info/html-instr/PFPNACC_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFRCP" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCP" iform="PFRCP_MMXq_MEMq" isa-set="3DNOW" string="PFRCP (MM, M64)" url="uops.info/html-instr/PFRCP_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFRCP" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCP" iform="PFRCP_MMXq_MMXq" isa-set="3DNOW" string="PFRCP (MM, MM)" url="uops.info/html-instr/PFRCP_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFRCPIT1" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCPIT1" iform="PFRCPIT1_MMXq_MEMq" isa-set="3DNOW" string="PFRCPIT1 (MM, M64)" url="uops.info/html-instr/PFRCPIT1_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFRCPIT1" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCPIT1" iform="PFRCPIT1_MMXq_MMXq" isa-set="3DNOW" string="PFRCPIT1 (MM, MM)" url="uops.info/html-instr/PFRCPIT1_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFRCPIT2" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCPIT2" iform="PFRCPIT2_MMXq_MEMq" isa-set="3DNOW" string="PFRCPIT2 (MM, M64)" url="uops.info/html-instr/PFRCPIT2_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFRCPIT2" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCPIT2" iform="PFRCPIT2_MMXq_MMXq" isa-set="3DNOW" string="PFRCPIT2 (MM, MM)" url="uops.info/html-instr/PFRCPIT2_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFRSQIT1" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRSQIT1" iform="PFRSQIT1_MMXq_MEMq" isa-set="3DNOW" string="PFRSQIT1 (MM, M64)" url="uops.info/html-instr/PFRSQIT1_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFRSQIT1" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRSQIT1" iform="PFRSQIT1_MMXq_MMXq" isa-set="3DNOW" string="PFRSQIT1 (MM, MM)" url="uops.info/html-instr/PFRSQIT1_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFRSQRT" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRSQRT" iform="PFRSQRT_MMXq_MEMq" isa-set="3DNOW" string="PFRSQRT (MM, M64)" url="uops.info/html-instr/PFRSQRT_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFRSQRT" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRSQRT" iform="PFRSQRT_MMXq_MMXq" isa-set="3DNOW" string="PFRSQRT (MM, MM)" url="uops.info/html-instr/PFRSQRT_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFSUB" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFSUB" iform="PFSUB_MMXq_MEMq" isa-set="3DNOW" string="PFSUB (MM, M64)" url="uops.info/html-instr/PFSUB_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFSUB" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFSUB" iform="PFSUB_MMXq_MMXq" isa-set="3DNOW" string="PFSUB (MM, MM)" url="uops.info/html-instr/PFSUB_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFSUBR" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFSUBR" iform="PFSUBR_MMXq_MEMq" isa-set="3DNOW" string="PFSUBR (MM, M64)" url="uops.info/html-instr/PFSUBR_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFSUBR" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFSUBR" iform="PFSUBR_MMXq_MMXq" isa-set="3DNOW" string="PFSUBR (MM, MM)" url="uops.info/html-instr/PFSUBR_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PI2FD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PI2FD" iform="PI2FD_MMXq_MEMq" isa-set="3DNOW" string="PI2FD (MM, M64)" url="uops.info/html-instr/PI2FD_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PI2FD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PI2FD" iform="PI2FD_MMXq_MMXq" isa-set="3DNOW" string="PI2FD (MM, MM)" url="uops.info/html-instr/PI2FD_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PI2FW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PI2FW" iform="PI2FW_MMXq_MEMq" isa-set="3DNOW" string="PI2FW (MM, M64)" url="uops.info/html-instr/PI2FW_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PI2FW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PI2FW" iform="PI2FW_MMXq_MMXq" isa-set="3DNOW" string="PI2FW (MM, MM)" url="uops.info/html-instr/PI2FW_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PMULHRW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PMULHRW" iform="PMULHRW_MMXq_MEMq" isa-set="3DNOW" string="PMULHRW (MM, M64)" url="uops.info/html-instr/PMULHRW_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PMULHRW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PMULHRW" iform="PMULHRW_MMXq_MMXq" isa-set="3DNOW" string="PMULHRW (MM, MM)" url="uops.info/html-instr/PMULHRW_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PSWAPD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PSWAPD" iform="PSWAPD_MMXq_MEMq" isa-set="3DNOW" string="PSWAPD (MM, M64)" url="uops.info/html-instr/PSWAPD_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PSWAPD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PSWAPD" iform="PSWAPD_MMXq_MMXq" isa-set="3DNOW" string="PSWAPD (MM, MM)" url="uops.info/html-instr/PSWAPD_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
  </extension>
  <extension name="3DNOW_PREFETCH">
    <instruction asm="PREFETCHW" category="PREFETCH" cpl="3" extension="3DNOW_PREFETCH" iclass="PREFETCHW" iform="PREFETCHW_0F0Dr1" isa-set="PREFETCH_NOP" string="PREFETCHW (M512)" url="uops.info/html-instr/PREFETCHW_M512.html" summary="Prefetch Data into Caches in Anticipation of a Write" url-ref="felixcloutier.com/x86/PREFETCHW.html">
      <operand idx="1" memory-prefix="zmmword ptr" name="MEM0" r="1" type="mem" width="512" xtype="i64"/>
      <architecture name="CON">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="1" ports="1*p015" TP_ports="0.33"/>
      </architecture>
      <architecture name="WOL">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="1" ports="1*p015" TP_ports="0.33"/>
      </architecture>
      <architecture name="NHM">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="WSM">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="SNB">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="HSW">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33"/>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1"/>
      </architecture>
      <architecture name="BNL">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="5.00" TP_loop="5.00" uops="2"/>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1"/>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1"/>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1"/>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="PREFETCH" category="PREFETCH" cpl="3" extension="3DNOW_PREFETCH" iclass="PREFETCH_EXCLUSIVE" iform="PREFETCH_EXCLUSIVE_MEMmprefetch" isa-set="PREFETCH_NOP" string="PREFETCH_EXCLUSIVE (M512)" url="uops.info/html-instr/PREFETCH_EXCLUSIVE_M512.html">
      <operand idx="1" memory-prefix="zmmword ptr" name="MEM0" r="1" type="mem" width="512" xtype="i64"/>
      <architecture name="CON">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="1" ports="1*p015" TP_ports="0.33"/>
      </architecture>
      <architecture name="WOL">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="1" ports="1*p015" TP_ports="0.33"/>
      </architecture>
      <architecture name="NHM">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="4" TP_no_interiteration="1.00" uops="1" ports="1*p2" TP_ports="1.00" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p2" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p2" TP_ports="1.00" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p2" TP_ports_indexed="1.00"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="WSM">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="4" TP_no_interiteration="1.00" uops="1" ports="1*p2" TP_ports="1.00" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p2" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p2" TP_ports="1.00" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p2" TP_ports_indexed="1.00"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="5" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="5" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="5" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33"/>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="9.00" TP_loop="8.92" uops="3"/>
      </architecture>
      <architecture name="BNL">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="5.00" TP_loop="5.00" uops="2"/>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="4.00" TP_loop="1.25" uops="2"/>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="4.00" TP_loop="1.20" uops="3"/>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="6.00" TP_loop="6.00" uops="2"/>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="7.00" TP_loop="7.00" uops="3"/>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1"/>
      </architecture>
    </instruction>
  </extension>
  <extension name="ADOX_ADCX">
    <instruction asm="ADCX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADCX" iform="ADCX_GPR32d_GPR32d" isa-set="ADOX_ADCX" string="ADCX (R32, R32)" url="uops.info/html-instr/ADCX_R32_R32.html" summary="Unsigned Integer Addition of Two Operands with Carry Flag" url-ref="felixcloutier.com/x86/ADCX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand flag_CF="r/w" idx="3" name="REG2" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.52" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.75" TP_loop="0.75" uops="1">
          <latency start_op="1" target_op="1" cycles="2"/>
          <latency start_op="1" target_op="3" cycles="2"/>
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="2" target_op="3" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.72" TP_unrolled="0.58" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="1" ports="ALU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.56" TP_loop="0.58" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/ALU3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.56" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/1/2" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.56" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="ADCX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADCX" iform="ADCX_GPR32d_MEMd" isa-set="ADOX_ADCX" string="ADCX (R32, M32)" url="uops.info/html-instr/ADCX_R32_M32.html" summary="Unsigned Integer Addition of Two Operands with Carry Flag" url-ref="felixcloutier.com/x86/ADCX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <operand flag_CF="r/w" idx="3" name="REG1" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.60" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.56" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.57" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.60" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.56" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.67" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.67" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.61" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.67" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.53" uops="2" ports="1*p06+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p06+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.75" TP_loop="0.75" uops="1">
          <latency start_op="1" target_op="1" cycles="2"/>
          <latency start_op="1" target_op="3" cycles="2"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="2" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="2" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.75" TP_unrolled="0.60" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="1" ports="ALU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.60" TP_loop="0.63" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/ALU3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.60" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/1/2" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.60" TP_loop="0.50" uops="1" TP_unrolled_indexed="0.67" TP_loop_indexed="0.50" uops_indexed="2">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="ADCX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADCX" iform="ADCX_GPR64q_GPR64q" isa-set="ADOX_ADCX" string="ADCX (R64, R64)" url="uops.info/html-instr/ADCX_R64_R64.html" summary="Unsigned Integer Addition of Two Operands with Carry Flag" url-ref="felixcloutier.com/x86/ADCX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand flag_CF="r/w" idx="3" name="REG2" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.52" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.75" TP_loop="0.75" uops="1">
          <latency start_op="1" target_op="1" cycles="2"/>
          <latency start_op="1" target_op="3" cycles="2"/>
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="2" target_op="3" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.72" TP_unrolled="0.58" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="1" ports="ALU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.56" TP_loop="0.58" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/ALU3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.56" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/1/2" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.56" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="ADCX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADCX" iform="ADCX_GPR64q_MEMq" isa-set="ADOX_ADCX" string="ADCX (R64, M64)" url="uops.info/html-instr/ADCX_R64_M64.html" summary="Unsigned Integer Addition of Two Operands with Carry Flag" url-ref="felixcloutier.com/x86/ADCX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
      <operand flag_CF="r/w" idx="3" name="REG1" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.60" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.56" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.57" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.60" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.67" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.56" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.67" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.67" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.53" uops="2" ports="1*p06+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.52" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p06+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.75" TP_loop="0.75" uops="1">
          <latency start_op="1" target_op="1" cycles="2"/>
          <latency start_op="1" target_op="3" cycles="2"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="2" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="2" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.75" TP_unrolled="0.60" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="1" ports="ALU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.60" TP_loop="0.63" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/ALU3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.60" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/1/2" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.60" TP_loop="0.50" uops="1" TP_unrolled_indexed="0.67" TP_loop_indexed="0.50" uops_indexed="2">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="ADOX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADOX" iform="ADOX_GPR32d_GPR32d" isa-set="ADOX_ADCX" string="ADOX (R32, R32)" url="uops.info/html-instr/ADOX_R32_R32.html" summary="Unsigned Integer Addition of Two Operands with Overflow Flag" url-ref="felixcloutier.com/x86/ADOX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand flag_OF="r/w" idx="3" name="REG2" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.52" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.75" TP_loop="0.75" uops="1">
          <latency start_op="1" target_op="1" cycles="2"/>
          <latency start_op="1" target_op="3" cycles="2"/>
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="2" target_op="3" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.58" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="1" ports="ALU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.56" TP_loop="0.58" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/ALU3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.56" TP_loop="0.33" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/1/2" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.54" TP_loop="0.33" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="ADOX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADOX" iform="ADOX_GPR32d_MEMd" isa-set="ADOX_ADCX" string="ADOX (R32, M32)" url="uops.info/html-instr/ADOX_R32_M32.html" summary="Unsigned Integer Addition of Two Operands with Overflow Flag" url-ref="felixcloutier.com/x86/ADOX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <operand flag_OF="r/w" idx="3" name="REG1" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.60" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.60" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.56" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.63" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.56" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.63" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.63" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.60" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.56" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.63" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.60" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.56" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.63" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.60" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.60" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.60" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.53" uops="2" ports="1*p06+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p06+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.75" TP_loop="0.75" uops="1">
          <latency start_op="1" target_op="1" cycles="2"/>
          <latency start_op="1" target_op="3" cycles="2"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="2" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="2" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.60" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="1" ports="ALU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.60" TP_loop="0.63" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/ALU3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.60" TP_loop="0.38" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/1/2" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.60" TP_loop="0.37" uops="1" TP_unrolled_indexed="0.65" TP_loop_indexed="0.38" uops_indexed="2">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="ADOX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADOX" iform="ADOX_GPR64q_GPR64q" isa-set="ADOX_ADCX" string="ADOX (R64, R64)" url="uops.info/html-instr/ADOX_R64_R64.html" summary="Unsigned Integer Addition of Two Operands with Overflow Flag" url-ref="felixcloutier.com/x86/ADOX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand flag_OF="r/w" idx="3" name="REG2" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="1" ports="1*p06" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.53" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.54" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.52" uops="1" ports="1*p06" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.75" TP_loop="0.75" uops="1">
          <latency start_op="1" target_op="1" cycles="2"/>
          <latency start_op="1" target_op="3" cycles="2"/>
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="2" target_op="3" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.58" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="1" ports="ALU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.56" TP_loop="0.58" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/ALU3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.56" TP_loop="0.33" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/1/2" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.56" TP_loop="0.33" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="2" target_op="3" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="ADOX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADOX" iform="ADOX_GPR64q_MEMq" isa-set="ADOX_ADCX" string="ADOX (R64, M64)" url="uops.info/html-instr/ADOX_R64_M64.html" summary="Unsigned Integer Addition of Two Operands with Overflow Flag" url-ref="felixcloutier.com/x86/ADOX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
      <operand flag_OF="r/w" idx="3" name="REG1" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.60" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.60" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.63" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.75" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="2" ports="1*p06+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.56" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.63" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.63" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.60" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.63" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.60" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="0.63" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.60" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.60" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.60" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.57" uops="2" ports="1*p06+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.67" TP_loop_indexed="0.60" uops_indexed="2" ports_indexed="1*p06+1*p23" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.53" uops="2" ports="1*p06+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p06+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.75" TP_loop="0.75" uops="1">
          <latency start_op="1" target_op="1" cycles="2"/>
          <latency start_op="1" target_op="3" cycles="2"/>
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_index="6" cycles_mem="2" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="6" cycles_addr_index="6" cycles_mem="2" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.60" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="1" ports="ALU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.60" TP_loop="0.63" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/ALU3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.60" TP_loop="0.38" uops="1">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
        <doc uops="1" ports="ALU0/1/2" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.57" TP_loop="0.37" uops="1" TP_unrolled_indexed="0.67" TP_loop_indexed="0.38" uops_indexed="2">
          <latency start_op="1" target_op="1" cycles="1"/>
          <latency start_op="1" target_op="3" cycles="1"/>
          <latency start_op="2" target_op="1" cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="3" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
  </extension>
  <extension name="AES">
    <instruction asm="AESDEC" category="AES" cpl="3" extension="AES" iclass="AESDEC" iform="AESDEC_XMMdq_XMMdq" isa-set="AES" string="AESDEC (XMM, XMM)" url="uops.info/html-instr/AESDEC_XMM_XMM.html" summary="Perform One Round of an AES Decryption Flow" url-ref="felixcloutier.com/x86/AESDEC.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.00" latency="6" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p05" uops="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.14" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.13" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="7"/>
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="4" complex_decoder="1" TP_unrolled="5.00" TP_loop="5.00" uops="4">
          <latency start_op="1" target_op="1" cycles="9"/>
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="1">
          <latency start_op="1" target_op="1" cycles="6"/>
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESDEC" category="AES" cpl="3" extension="AES" iclass="AESDEC" iform="AESDEC_XMMdq_MEMdq" isa-set="AES" string="AESDEC (XMM, M128)" url="uops.info/html-instr/AESDEC_XMM_M128.html" summary="Perform One Round of an AES Decryption Flow" url-ref="felixcloutier.com/x86/AESDEC.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p2+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p2+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p2+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p2+1*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p2+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.25" TP_ports="1.00" TP_unrolled="1.04" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.31" TP_ports="1.00" TP_unrolled="1.05" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="7"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="5" complex_decoder="1" TP_unrolled="9.00" TP_loop="9.00" uops="5">
          <latency start_op="1" target_op="1" cycles="9"/>
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="4.00" TP_loop="2.06" uops="2">
          <latency start_op="1" target_op="1" cycles="6"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESDECLAST" category="AES" cpl="3" extension="AES" iclass="AESDECLAST" iform="AESDECLAST_XMMdq_XMMdq" isa-set="AES" string="AESDECLAST (XMM, XMM)" url="uops.info/html-instr/AESDECLAST_XMM_XMM.html" summary="Perform Last Round of an AES Decryption Flow" url-ref="felixcloutier.com/x86/AESDECLAST.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.00" latency="6" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p05" uops="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.15" TP_ports="1.00" TP_unrolled="1.10" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.13" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="7"/>
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="4" complex_decoder="1" TP_unrolled="5.00" TP_loop="5.00" uops="4">
          <latency start_op="1" target_op="1" cycles="9"/>
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="1">
          <latency start_op="1" target_op="1" cycles="6"/>
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESDECLAST" category="AES" cpl="3" extension="AES" iclass="AESDECLAST" iform="AESDECLAST_XMMdq_MEMdq" isa-set="AES" string="AESDECLAST (XMM, M128)" url="uops.info/html-instr/AESDECLAST_XMM_M128.html" summary="Perform Last Round of an AES Decryption Flow" url-ref="felixcloutier.com/x86/AESDECLAST.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p2+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p2+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p2+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p2+1*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p2+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.28" TP_ports="1.00" TP_unrolled="1.02" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.31" TP_ports="1.00" TP_unrolled="1.05" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="7"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="5" complex_decoder="1" TP_unrolled="9.00" TP_loop="9.00" uops="5">
          <latency start_op="1" target_op="1" cycles="9"/>
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="4.00" TP_loop="2.06" uops="2">
          <latency start_op="1" target_op="1" cycles="6"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESENC" category="AES" cpl="3" extension="AES" iclass="AESENC" iform="AESENC_XMMdq_XMMdq" isa-set="AES" string="AESENC (XMM, XMM)" url="uops.info/html-instr/AESENC_XMM_XMM.html" summary="Perform One Round of an AES Encryption Flow" url-ref="felixcloutier.com/x86/AESENC.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.00" latency="6" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p05" uops="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.14" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.13" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="7"/>
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="4" complex_decoder="1" TP_unrolled="5.00" TP_loop="5.00" uops="4">
          <latency start_op="1" target_op="1" cycles="9"/>
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="1">
          <latency start_op="1" target_op="1" cycles="6"/>
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESENC" category="AES" cpl="3" extension="AES" iclass="AESENC" iform="AESENC_XMMdq_MEMdq" isa-set="AES" string="AESENC (XMM, M128)" url="uops.info/html-instr/AESENC_XMM_M128.html" summary="Perform One Round of an AES Encryption Flow" url-ref="felixcloutier.com/x86/AESENC.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p2+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p2+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p2+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p2+1*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p2+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.28" TP_ports="1.00" TP_unrolled="1.02" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.28" TP_ports="1.00" TP_unrolled="1.04" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="7"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="5" complex_decoder="1" TP_unrolled="9.00" TP_loop="9.00" uops="5">
          <latency start_op="1" target_op="1" cycles="9"/>
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="4.00" TP_loop="2.06" uops="2">
          <latency start_op="1" target_op="1" cycles="6"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESENCLAST" category="AES" cpl="3" extension="AES" iclass="AESENCLAST" iform="AESENCLAST_XMMdq_XMMdq" isa-set="AES" string="AESENCLAST (XMM, XMM)" url="uops.info/html-instr/AESENCLAST_XMM_XMM.html" summary="Perform Last Round of an AES Encryption Flow" url-ref="felixcloutier.com/x86/AESENCLAST.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.00" latency="6" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p05" uops="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.13" TP_ports="1.00" TP_unrolled="1.09" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.14" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="7"/>
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="4" complex_decoder="1" TP_unrolled="5.00" TP_loop="5.00" uops="4">
          <latency start_op="1" target_op="1" cycles="9"/>
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="1">
          <latency start_op="1" target_op="1" cycles="6"/>
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESENCLAST" category="AES" cpl="3" extension="AES" iclass="AESENCLAST" iform="AESENCLAST_XMMdq_MEMdq" isa-set="AES" string="AESENCLAST (XMM, M128)" url="uops.info/html-instr/AESENCLAST_XMM_M128.html" summary="Perform Last Round of an AES Encryption Flow" url-ref="felixcloutier.com/x86/AESENCLAST.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p2+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p2+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p2+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p2+1*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p2+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.25" TP_ports="1.00" TP_unrolled="1.05" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.31" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="13" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="7"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="3"/>
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="5" complex_decoder="1" TP_unrolled="9.00" TP_loop="9.00" uops="5">
          <latency start_op="1" target_op="1" cycles="9"/>
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="4.00" TP_loop="2.06" uops="2">
          <latency start_op="1" target_op="1" cycles="6"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.52" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles="4"/>
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESIMC" category="AES" cpl="3" extension="AES" iclass="AESIMC" iform="AESIMC_XMMdq_XMMdq" isa-set="AES" string="AESIMC (XMM, XMM)" url="uops.info/html-instr/AESIMC_XMM_XMM.html" summary="Perform the AES InvMixColumn Transformation" url-ref="felixcloutier.com/x86/AESIMC.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.00" latency="6" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p05" uops="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="12" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.06" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="14" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="12" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.07" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="14" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="14" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.96" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="14" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.96" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="14"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p0" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.96" uops="2" ports="2*p0" TP_ports="2.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p0" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p0" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.96" uops="2" ports="2*p0" TP_ports="2.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p0" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p0" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p0" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.05" TP_loop="1.00" uops="2" ports="2*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p0" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="5.00" TP_loop="4.00" uops="3">
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="1">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESIMC" category="AES" cpl="3" extension="AES" iclass="AESIMC" iform="AESIMC_XMMdq_MEMdq" isa-set="AES" string="AESIMC (XMM, M128)" url="uops.info/html-instr/AESIMC_XMM_M128.html" summary="Perform the AES InvMixColumn Transformation" url-ref="felixcloutier.com/x86/AESIMC.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p2+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p2+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p2+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p2+1*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p2+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="18" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.06" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="18" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.06" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="20" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles_addr="20" cycles_addr_is_upper_bound="1" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="2*p0+1*p23" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p0+1*p23" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="2*p0+1*p23" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p0+1*p23" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="2*p0+1*p23" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="2*p0+1*p23" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p0+1*p23" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="2*p0+1*p23" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p0+1*p23" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="2*p0+1*p23" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p0+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="2*p0+1*p23" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p01+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="2*p0+1*p23" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p01+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p01+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p01+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p01+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="4" complex_decoder="1" TP_unrolled="8.00" TP_loop="8.00" uops="4">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESKEYGENASSIST" category="AES" cpl="3" extension="AES" iclass="AESKEYGENASSIST" iform="AESKEYGENASSIST_XMMdq_XMMdq_IMMb" isa-set="AES" string="AESKEYGENASSIST (XMM, XMM, I8)" url="uops.info/html-instr/AESKEYGENASSIST_XMM_XMM_I8.html" summary="AES Round Key Generation Assist" url-ref="felixcloutier.com/x86/AESKEYGENASSIST.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.20" latency="12" TP_no_interiteration="2.00" uops="5" ports="2*p0+1*p015+1*p2+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="5" ports="2*p0+1*p015+1*p2+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p015+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <measurement TP_loop="8.00" TP_ports="7.00" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p015+1*p15+7*p5" uops="11" uops_MITE="3" uops_MS="8" uops_retire_slots="11">
          <latency cycles="10" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="8.00" TP_ports="7.00" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p015+1*p15+7*p5" uops="11" uops_MITE="3" uops_MS="8" uops_retire_slots="11">
          <latency cycles="10" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="7.00" TP_no_interiteration="7.00" uops="11" ports="2*p0+2*p015+7*p5" TP_ports="7.00"/>
        <IACA version="2.3" TP="7.00" uops="11" ports="2*p0+2*p015+7*p5" TP_ports="7.00"/>
        <IACA version="3.0" TP="6.68" uops="11" ports="2*p0+2*p015+7*p5" TP_ports="7.00"/>
        <measurement TP_loop="8.46" TP_ports="8.00" TP_unrolled="8.47" available_simple_decoders="0" complex_decoder="1" ports="2*p0+8*p5" uops="10" uops_MITE="3" uops_MS="7" uops_retire_slots="10">
          <latency cycles="9" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="7.00" TP_no_interiteration="7.00" uops="11" ports="2*p0+2*p015+7*p5" TP_ports="7.00"/>
        <IACA version="2.3" TP="7.00" uops="11" ports="2*p0+2*p015+7*p5" TP_ports="7.00"/>
        <IACA version="3.0" TP="6.67" uops="11" ports="2*p0+2*p015+7*p5" TP_ports="7.00"/>
        <measurement uops_retire_slots="10" uops_MITE="3" uops_MS="7" complex_decoder="1" available_simple_decoders="0" TP_unrolled="8.50" TP_loop="8.54" uops="10" ports="2*p0+8*p5" TP_ports="8.00">
          <latency start_op="2" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="6.00" uops="11" ports="3*p0+2*p015+6*p5" TP_ports="6.00"/>
        <IACA version="3.0" TP="5.71" uops="11" ports="3*p0+2*p015+6*p5" TP_ports="6.00"/>
        <measurement uops_retire_slots="13" uops_MITE="3" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="5*p0+1*p06+7*p5" TP_ports="7.00">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="6.00" uops="11" ports="3*p0+2*p015+6*p5" TP_ports="6.00"/>
        <IACA version="3.0" TP="5.71" uops="11" ports="3*p0+2*p015+6*p5" TP_ports="6.00"/>
        <measurement uops_retire_slots="13" uops_MITE="3" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="5*p0+1*p06+7*p5" TP_ports="7.00">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="12.00" TP_ports="7.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+7*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="13" uops_MITE="3" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="5*p0+1*p06+7*p5" TP_ports="7.00">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="13" uops_MITE="3" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="4*p0+1*p01+1*p015+1*p06+6*p5" TP_ports="6.00">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="13" uops_MITE="3" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="5*p0+1*p06+7*p5" TP_ports="7.00">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="14" uops_MITE="3" uops_MS="11" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="4*p0+1*p01+1*p015+1*p06+3*p15+3*p5" TP_ports="4.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="14" uops_MITE="3" uops_MS="11" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="4*p0+1*p01+1*p015+1*p06+3*p15+3*p5" TP_ports="4.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="14" uops_MITE="3" uops_MS="11" complex_decoder="1" available_simple_decoders="0" TP_unrolled="11.97" TP_loop="12.00" uops="13" ports="4*p0+1*p01+1*p015+1*p06+3*p15+3*p5" TP_ports="4.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="15" uops_MITE="3" uops_MS="12" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.94" TP_loop="13.00" uops="14" ports="4*p0+1*p01+1*p015+2*p06+3*p15+3*p5" TP_ports="4.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="5.00" TP_loop="4.00" uops="3">
          <latency start_op="2" target_op="1" cycles="8"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="1">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESKEYGENASSIST" category="AES" cpl="3" extension="AES" iclass="AESKEYGENASSIST" iform="AESKEYGENASSIST_XMMdq_MEMdq_IMMb" isa-set="AES" string="AESKEYGENASSIST (XMM, M128, I8)" url="uops.info/html-instr/AESKEYGENASSIST_XMM_M128_I8.html" summary="AES Round Key Generation Assist" url-ref="felixcloutier.com/x86/AESKEYGENASSIST.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="WSM">
        <IACA version="2.1" TP="2.00" latency="6" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p015+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p015+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" complex_decoder="1" ports="2*p0+1*p015+1*p2+1*p5" uops="5" uops_MS="1" uops_retire_slots="5">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <measurement TP_loop="6.95" TP_ports="6.00" TP_unrolled="6.94" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p015+1*p15+1*p23+6*p5" uops="11" uops_MITE="3" uops_MS="8" uops_retire_slots="11">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="6.95" TP_ports="6.00" TP_unrolled="6.92" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p015+1*p15+1*p23+6*p5" uops="11" uops_MITE="3" uops_MS="8" uops_retire_slots="11">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="7.00" TP_no_interiteration="7.00" uops="11" ports="2*p0+1*p015+1*p23+7*p5" TP_ports="7.00"/>
        <IACA version="2.3" TP="7.00" uops="11" ports="2*p0+1*p015+1*p23+7*p5" TP_ports="7.00"/>
        <IACA version="3.0" TP="6.90" uops="11" ports="2*p0+1*p015+1*p23+7*p5" TP_ports="7.00"/>
        <measurement TP_loop="7.78" TP_ports="7.00" TP_unrolled="7.84" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p23+7*p5" uops="10" uops_MITE="3" uops_MS="7" uops_retire_slots="10">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="7.00" TP_no_interiteration="7.00" uops="11" ports="2*p0+1*p015+1*p23+7*p5" TP_ports="7.00"/>
        <IACA version="2.3" TP="7.00" uops="11" ports="2*p0+1*p015+1*p23+7*p5" TP_ports="7.00"/>
        <IACA version="3.0" TP="6.89" uops="11" ports="2*p0+1*p015+1*p23+7*p5" TP_ports="7.00"/>
        <measurement uops_retire_slots="10" uops_MITE="3" uops_MS="7" complex_decoder="1" available_simple_decoders="0" TP_unrolled="7.89" TP_loop="7.80" uops="10" ports="2*p0+1*p23+7*p5" TP_ports="7.00">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="6.00" uops="11" ports="3*p0+1*p015+1*p23+6*p5" TP_ports="6.00"/>
        <IACA version="3.0" TP="5.89" uops="11" ports="3*p0+1*p015+1*p23+6*p5" TP_ports="6.00"/>
        <measurement uops_retire_slots="13" uops_MITE="3" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="5*p0+1*p06+1*p23+6*p5" TP_ports="6.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="6.00" uops="11" ports="3*p0+1*p015+1*p23+6*p5" TP_ports="6.00"/>
        <IACA version="3.0" TP="5.89" uops="11" ports="3*p0+1*p015+1*p23+6*p5" TP_ports="6.00"/>
        <measurement uops_retire_slots="13" uops_MITE="3" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="5*p0+1*p06+1*p23+6*p5" TP_ports="6.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="12.00" TP_ports="6.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+1*p23+6*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="13" uops_MITE="3" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="5*p0+1*p06+1*p23+6*p5" TP_ports="6.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="13" uops_MITE="3" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="4*p0+1*p01+1*p06+1*p23+6*p5" TP_ports="6.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="13" uops_MITE="3" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="5*p0+1*p06+1*p23+6*p5" TP_ports="6.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="14" uops_MITE="3" uops_MS="11" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="4*p0+1*p01+1*p06+3*p15+1*p23+3*p5" TP_ports="4.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="14" uops_MITE="3" uops_MS="11" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.00" TP_loop="12.00" uops="13" ports="4*p0+1*p01+1*p06+3*p15+1*p23+3*p5" TP_ports="4.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="14" uops_MITE="3" uops_MS="11" complex_decoder="1" available_simple_decoders="0" TP_unrolled="11.97" TP_loop="12.00" uops="13" ports="4*p0+1*p01+1*p06+3*p15+1*p23+3*p5" TP_ports="4.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="15" uops_MITE="3" uops_MS="12" complex_decoder="1" available_simple_decoders="0" TP_unrolled="12.91" TP_loop="13.00" uops="14" ports="4*p0+1*p01+2*p06+3*p15+1*p23A+3*p5" TP_ports="4.00" uops_retire_slots_indexed="15" uops_MITE_indexed="3" uops_MS_indexed="12" complex_decoder_indexed="1" available_simple_decoders_indexed="0" TP_unrolled_indexed="12.91" TP_loop_indexed="13.00" uops_indexed="14" ports_indexed="4*p0+1*p01+2*p06+6*p15+1*p23A" TP_ports_indexed="4.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="AMT">
        <measurement uops_MS="4" complex_decoder="1" TP_unrolled="8.00" TP_loop="8.00" uops="4">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLM">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="GLP">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TRM">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
  </extension>
  <extension name="AMD_INVLPGB">
    <instruction asm="INVLPGB" category="SYSTEM" cpl="0" extension="AMD_INVLPGB" iclass="INVLPGB" iform="INVLPGB_RAX_EDX_ECX" isa-set="AMD_INVLPGB" string="INVLPGB (RAX, EDX, ECX)" url="uops.info/html-instr/INVLPGB_RAX_EDX_ECX.html">
      <operand idx="1" implicit="1" name="REG0" r="1" type="reg">RAX</operand>
      <operand idx="2" implicit="1" name="REG1" r="1" type="reg">EDX</operand>
      <operand idx="3" implicit="1" name="REG2" r="1" type="reg">ECX</operand>
      <architecture name="ZEN3">
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="TLBSYNC" category="SYSTEM" cpl="0" extension="AMD_INVLPGB" iclass="TLBSYNC" iform="TLBSYNC" isa-set="AMD_INVLPGB" string="TLBSYNC" url="uops.info/html-instr/TLBSYNC.html">
      <architecture name="ZEN3">
        <doc uops="ucode"/>
      </architecture>
    </instruction>
  </extension>
  <extension name="AMX_BF16">
    <instruction asm="TDPBF16PS" category="AMX_TILE" cpl="3" extension="AMX_BF16" iclass="TDPBF16PS" iform="TDPBF16PS_TMMf32_TMMu32_TMMu32" isa-set="AMX_BF16" string="TDPBF16PS (MM, MM, MM)" vex="1" url="uops.info/html-instr/TDPBF16PS_MM_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="0" xtype="f32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
  </extension>
  <extension name="AMX_FP16">
    <instruction asm="TDPFP16PS" category="AMX_TILE" cpl="3" extension="AMX_FP16" iclass="TDPFP16PS" iform="TDPFP16PS_TMMf32_TMM2f16_TMM2f16" isa-set="AMX_FP16" string="TDPFP16PS (MM, MM, MM)" vex="1" url="uops.info/html-instr/TDPFP16PS_MM_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="0" xtype="f32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="0" xtype="2f16">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="0" xtype="2f16">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
  </extension>
  <extension name="AMX_INT8">
    <instruction asm="TDPBSSD" category="AMX_TILE" cpl="3" extension="AMX_INT8" iclass="TDPBSSD" iform="TDPBSSD_TMMi32_TMMu32_TMMu32" isa-set="AMX_INT8" string="TDPBSSD (MM, MM, MM)" vex="1" url="uops.info/html-instr/TDPBSSD_MM_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="0" xtype="i32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
    <instruction asm="TDPBSUD" category="AMX_TILE" cpl="3" extension="AMX_INT8" iclass="TDPBSUD" iform="TDPBSUD_TMMi32_TMMu32_TMMu32" isa-set="AMX_INT8" string="TDPBSUD (MM, MM, MM)" vex="1" url="uops.info/html-instr/TDPBSUD_MM_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="0" xtype="i32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
    <instruction asm="TDPBUSD" category="AMX_TILE" cpl="3" extension="AMX_INT8" iclass="TDPBUSD" iform="TDPBUSD_TMMi32_TMMu32_TMMu32" isa-set="AMX_INT8" string="TDPBUSD (MM, MM, MM)" vex="1" url="uops.info/html-instr/TDPBUSD_MM_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="0" xtype="i32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
    <instruction asm="TDPBUUD" category="AMX_TILE" cpl="3" extension="AMX_INT8" iclass="TDPBUUD" iform="TDPBUUD_TMMu32_TMMu32_TMMu32" isa-set="AMX_INT8" string="TDPBUUD (MM, MM, MM)" vex="1" url="uops.info/html-instr/TDPBUUD_MM_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
  </extension>
  <extension name="AMX_TILE">
    <instruction asm="LDTILECFG" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="LDTILECFG" iform="LDTILECFG_MEM" isa-set="AMX_TILE" string="LDTILECFG (M512)" vex="1" url="uops.info/html-instr/LDTILECFG_M512.html">
      <operand idx="1" memory-prefix="zmmword ptr" name="MEM0" r="1" type="mem" width="512" xtype="struct"/>
    </instruction>
    <instruction asm="STTILECFG" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="STTILECFG" iform="STTILECFG_MEM" isa-set="AMX_TILE" string="STTILECFG (M512)" vex="1" url="uops.info/html-instr/STTILECFG_M512.html">
      <operand idx="1" memory-prefix="zmmword ptr" name="MEM0" type="mem" w="1" width="512" xtype="struct"/>
    </instruction>
    <instruction asm="TILELOADD" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="TILELOADD" iform="TILELOADD_TMMu32_MEMu32" isa-set="AMX_TILE" string="TILELOADD (MM, M0)" vex="1" url="uops.info/html-instr/TILELOADD_MM_M0.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="MEM0" r="1" type="mem" width="0" xtype="u32"/>
    </instruction>
    <instruction asm="TILELOADDT1" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="TILELOADDT1" iform="TILELOADDT1_TMMu32_MEMu32" isa-set="AMX_TILE" string="TILELOADDT1 (MM, M0)" vex="1" url="uops.info/html-instr/TILELOADDT1_MM_M0.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="MEM0" r="1" type="mem" width="0" xtype="u32"/>
    </instruction>
    <instruction asm="TILERELEASE" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="TILERELEASE" iform="TILERELEASE" isa-set="AMX_TILE" string="TILERELEASE" vex="1" url="uops.info/html-instr/TILERELEASE.html"/>
    <instruction asm="TILESTORED" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="TILESTORED" iform="TILESTORED_MEMu32_TMMu32" isa-set="AMX_TILE" string="TILESTORED (M0, MM)" vex="1" url="uops.info/html-instr/TILESTORED_M0_MM.html">
      <operand idx="1" name="MEM0" type="mem" w="1" width="0" xtype="u32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
    <instruction asm="TILEZERO" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="TILEZERO" iform="TILEZERO_TMMu32" isa-set="AMX_TILE" string="TILEZERO (MM)" vex="1" url="uops.info/html-instr/TILEZERO_MM.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
  </extension>
  <extension name="AVX">
    <instruction asm="VADDPD" category="AVX" cpl="3" extension="AVX" iclass="VADDPD" iform="VADDPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VADDPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VADDPD_XMM_XMM_M128.html" summary="Add Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDPD" category="AVX" cpl="3" extension="AVX" iclass="VADDPD" iform="VADDPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VADDPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VADDPD_XMM_XMM_XMM.html" summary="Add Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDPD" category="AVX" cpl="3" extension="AVX" iclass="VADDPD" iform="VADDPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VADDPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VADDPD_YMM_YMM_M256.html" summary="Add Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDPD" category="AVX" cpl="3" extension="AVX" iclass="VADDPD" iform="VADDPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VADDPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VADDPD_YMM_YMM_YMM.html" summary="Add Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDPS" category="AVX" cpl="3" extension="AVX" iclass="VADDPS" iform="VADDPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VADDPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VADDPS_XMM_XMM_M128.html" summary="Add Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDPS" category="AVX" cpl="3" extension="AVX" iclass="VADDPS" iform="VADDPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VADDPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VADDPS_XMM_XMM_XMM.html" summary="Add Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDPS" category="AVX" cpl="3" extension="AVX" iclass="VADDPS" iform="VADDPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VADDPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VADDPS_YMM_YMM_M256.html" summary="Add Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.52" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.56" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDPS" category="AVX" cpl="3" extension="AVX" iclass="VADDPS" iform="VADDPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VADDPS (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VADDPS_YMM_YMM_YMM.html" summary="Add Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSD" category="AVX" cpl="3" extension="AVX" iclass="VADDSD" iform="VADDSD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VADDSD (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VADDSD_XMM_XMM_M64.html" summary="Add Scalar Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSD" category="AVX" cpl="3" extension="AVX" iclass="VADDSD" iform="VADDSD_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VADDSD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VADDSD_XMM_XMM_XMM.html" summary="Add Scalar Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSS" category="AVX" cpl="3" extension="AVX" iclass="VADDSS" iform="VADDSS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VADDSS (XMM, XMM, M32)" vex="1" url="uops.info/html-instr/VADDSS_XMM_XMM_M32.html" summary="Add Scalar Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSS" category="AVX" cpl="3" extension="AVX" iclass="VADDSS" iform="VADDSS_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VADDSS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VADDSS_XMM_XMM_XMM.html" summary="Add Scalar Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ADDSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPD" iform="VADDSUBPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VADDSUBPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VADDSUBPD_XMM_XMM_M128.html" summary="Packed Double-FP Add/Subtract" url-ref="felixcloutier.com/x86/ADDSUBPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPD" iform="VADDSUBPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VADDSUBPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VADDSUBPD_XMM_XMM_XMM.html" summary="Packed Double-FP Add/Subtract" url-ref="felixcloutier.com/x86/ADDSUBPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPD" iform="VADDSUBPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VADDSUBPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VADDSUBPD_YMM_YMM_M256.html" summary="Packed Double-FP Add/Subtract" url-ref="felixcloutier.com/x86/ADDSUBPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPD" iform="VADDSUBPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VADDSUBPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VADDSUBPD_YMM_YMM_YMM.html" summary="Packed Double-FP Add/Subtract" url-ref="felixcloutier.com/x86/ADDSUBPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPS" iform="VADDSUBPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VADDSUBPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VADDSUBPS_XMM_XMM_M128.html" summary="Packed Single-FP Add/Subtract" url-ref="felixcloutier.com/x86/ADDSUBPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPS" iform="VADDSUBPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VADDSUBPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VADDSUBPS_XMM_XMM_XMM.html" summary="Packed Single-FP Add/Subtract" url-ref="felixcloutier.com/x86/ADDSUBPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPS" iform="VADDSUBPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VADDSUBPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VADDSUBPS_YMM_YMM_M256.html" summary="Packed Single-FP Add/Subtract" url-ref="felixcloutier.com/x86/ADDSUBPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPS" iform="VADDSUBPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VADDSUBPS (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VADDSUBPS_YMM_YMM_YMM.html" summary="Packed Single-FP Add/Subtract" url-ref="felixcloutier.com/x86/ADDSUBPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPD" iform="VANDNPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VANDNPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VANDNPD_XMM_XMM_M128.html" summary="Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDNPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.35" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPD" iform="VANDNPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VANDNPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VANDNPD_XMM_XMM_XMM.html" summary="Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDNPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.25" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPD" iform="VANDNPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VANDNPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VANDNPD_YMM_YMM_M256.html" summary="Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDNPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="u64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.42" TP_loop="0.40" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.52" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPD" iform="VANDNPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VANDNPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VANDNPD_YMM_YMM_YMM.html" summary="Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDNPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPS" iform="VANDNPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VANDNPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VANDNPS_XMM_XMM_M128.html" summary="Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDNPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.36" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPS" iform="VANDNPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VANDNPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VANDNPS_XMM_XMM_XMM.html" summary="Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDNPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.25" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPS" iform="VANDNPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VANDNPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VANDNPS_YMM_YMM_M256.html" summary="Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDNPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.42" TP_loop="0.40" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPS" iform="VANDNPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VANDNPS (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VANDNPS_YMM_YMM_YMM.html" summary="Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDNPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPD" iform="VANDPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VANDPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VANDPD_XMM_XMM_M128.html" summary="Bitwise Logical AND of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.35" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPD" iform="VANDPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VANDPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VANDPD_XMM_XMM_XMM.html" summary="Bitwise Logical AND of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPD" iform="VANDPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VANDPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VANDPD_YMM_YMM_M256.html" summary="Bitwise Logical AND of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="u64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.42" TP_loop="0.40" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPD" iform="VANDPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VANDPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VANDPD_YMM_YMM_YMM.html" summary="Bitwise Logical AND of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPS" iform="VANDPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VANDPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VANDPS_XMM_XMM_M128.html" summary="Bitwise Logical AND of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPS" iform="VANDPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VANDPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VANDPS_XMM_XMM_XMM.html" summary="Bitwise Logical AND of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPS" iform="VANDPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VANDPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VANDPS_YMM_YMM_M256.html" summary="Bitwise Logical AND of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.42" TP_loop="0.40" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPS" iform="VANDPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VANDPS (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VANDPS_YMM_YMM_YMM.html" summary="Bitwise Logical AND of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ANDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPD" iform="VBLENDPD_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VBLENDPD (XMM, XMM, M128, I8)" vex="1" url="uops.info/html-instr/VBLENDPD_XMM_XMM_M128_I8.html" summary="Blend Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="0.97" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="0.97" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="0.33" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPD" iform="VBLENDPD_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VBLENDPD (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VBLENDPD_XMM_XMM_XMM_I8.html" summary="Blend Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.34" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.37" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPD" iform="VBLENDPD_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" string="VBLENDPD (YMM, YMM, M256, I8)" vex="1" url="uops.info/html-instr/VBLENDPD_YMM_YMM_M256_I8.html" summary="Blend Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p05+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p05+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="8" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.98" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.40" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="0.40" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPD" iform="VBLENDPD_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" string="VBLENDPD (YMM, YMM, YMM, I8)" vex="1" url="uops.info/html-instr/VBLENDPD_YMM_YMM_YMM_I8.html" summary="Blend Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.34" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.37" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPS" iform="VBLENDPS_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VBLENDPS (XMM, XMM, M128, I8)" vex="1" url="uops.info/html-instr/VBLENDPS_XMM_XMM_M128_I8.html" summary="Blend Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="0.97" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="0.97" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="0.33" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPS" iform="VBLENDPS_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VBLENDPS (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VBLENDPS_XMM_XMM_XMM_I8.html" summary="Blend Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.34" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.38" TP_loop="0.38" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.37" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.37" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPS" iform="VBLENDPS_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" string="VBLENDPS (YMM, YMM, M256, I8)" vex="1" url="uops.info/html-instr/VBLENDPS_YMM_YMM_M256_I8.html" summary="Blend Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p05+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p05+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="8" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.98" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.40" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="0.40" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPS" iform="VBLENDPS_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" string="VBLENDPS (YMM, YMM, YMM, I8)" vex="1" url="uops.info/html-instr/VBLENDPS_YMM_YMM_YMM_I8.html" summary="Blend Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.34" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.38" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.38" TP_loop="0.38" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.37" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.37" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPD" iform="VBLENDVPD_XMMdq_XMMdq_MEMdq_XMMdq" isa-set="AVX" string="VBLENDVPD (XMM, XMM, M128, XMM)" vex="1" url="uops.info/html-instr/VBLENDVPD_XMM_XMM_M128_XMM.html" summary="Variable Blend Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDVPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="4" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="8" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="8" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="8" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.95" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="0.95" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.95" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="0.95" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="3*p015+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.55" TP_loop="3.17" uops="4">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPD" iform="VBLENDVPD_XMMdq_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VBLENDVPD (XMM, XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VBLENDVPD_XMM_XMM_XMM_XMM.html" summary="Variable Blend Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDVPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="REG3" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="2" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.90" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.90" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <IACA version="3.0" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <IACA version="3.0" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="3*p015" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
          <latency start_op="4" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.56" TP_loop="3.23" uops="4">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
          <latency start_op="4" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPD" iform="VBLENDVPD_YMMqq_YMMqq_MEMqq_YMMqq" isa-set="AVX" string="VBLENDVPD (YMM, YMM, M256, YMM)" vex="1" url="uops.info/html-instr/VBLENDVPD_YMM_YMM_M256_YMM.html" summary="Variable Blend Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDVPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <operand idx="4" name="REG2" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="9" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.95" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="0.95" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.95" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="0.95" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="3*p015+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="6" complex_decoder="1" TP_unrolled="3.57" TP_loop="3.18" uops="8">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPD" iform="VBLENDVPD_YMMqq_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VBLENDVPD (YMM, YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VBLENDVPD_YMM_YMM_YMM_YMM.html" summary="Variable Blend Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDVPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="REG3" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="2" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.90" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.90" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <IACA version="3.0" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <IACA version="3.0" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="3*p015" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
          <latency start_op="4" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="6" complex_decoder="1" TP_unrolled="3.92" TP_loop="3.86" uops="8">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
          <latency start_op="4" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPS" iform="VBLENDVPS_XMMdq_XMMdq_MEMdq_XMMdq" isa-set="AVX" string="VBLENDVPS (XMM, XMM, M128, XMM)" vex="1" url="uops.info/html-instr/VBLENDVPS_XMM_XMM_M128_XMM.html" summary="Variable Blend Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDVPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <operand idx="4" name="REG2" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="8" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="8" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="8" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.95" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="0.95" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.95" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="0.95" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="3*p015+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.53" TP_loop="3.17" uops="4">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPS" iform="VBLENDVPS_XMMdq_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VBLENDVPS (XMM, XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VBLENDVPS_XMM_XMM_XMM_XMM.html" summary="Variable Blend Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDVPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="REG3" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="2" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.90" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.90" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <IACA version="3.0" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <IACA version="3.0" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="3*p015" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
          <latency start_op="4" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.56" TP_loop="3.23" uops="4">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
          <latency start_op="4" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPS" iform="VBLENDVPS_YMMqq_YMMqq_MEMqq_YMMqq" isa-set="AVX" string="VBLENDVPS (YMM, YMM, M256, YMM)" vex="1" url="uops.info/html-instr/VBLENDVPS_YMM_YMM_M256_YMM.html" summary="Variable Blend Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDVPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <operand idx="4" name="REG2" r="1" type="reg" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="9" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.95" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="0.95" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.95" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="0.95" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="3*p015+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="6" complex_decoder="1" TP_unrolled="3.55" TP_loop="3.18" uops="8">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPS" iform="VBLENDVPS_YMMqq_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VBLENDVPS (YMM, YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VBLENDVPS_YMM_YMM_YMM_YMM.html" summary="Variable Blend Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/BLENDVPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="REG3" r="1" type="reg" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="2*p05" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="2" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.90" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.90" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <IACA version="3.0" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <IACA version="3.0" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="3*p015" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
          <latency start_op="4" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="6" complex_decoder="1" TP_unrolled="3.90" TP_loop="3.86" uops="8">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
          <latency start_op="4" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBROADCASTF128" category="BROADCAST" cpl="3" extension="AVX" iclass="VBROADCASTF128" iform="VBROADCASTF128_YMMqq_MEMdq" isa-set="AVX" string="VBROADCASTF128 (YMM, M128)" vex="1" url="uops.info/html-instr/VBROADCASTF128_YMM_M128.html" summary="Load with Broadcast Floating-Point Data" url-ref="felixcloutier.com/x86/VBROADCAST.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="LD, FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD, FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBROADCASTSD" category="BROADCAST" cpl="3" extension="AVX" iclass="VBROADCASTSD" iform="VBROADCASTSD_YMMqq_MEMq" isa-set="AVX" string="VBROADCASTSD (YMM, M64)" vex="1" url="uops.info/html-instr/VBROADCASTSD_YMM_M64.html" summary="Load with Broadcast Floating-Point Data" url-ref="felixcloutier.com/x86/VBROADCAST.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="LD, FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD, FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBROADCASTSS" category="BROADCAST" cpl="3" extension="AVX" iclass="VBROADCASTSS" iform="VBROADCASTSS_XMMdq_MEMd" isa-set="AVX" string="VBROADCASTSS (XMM, M32)" vex="1" url="uops.info/html-instr/VBROADCASTSS_XMM_M32.html" summary="Load with Broadcast Floating-Point Data" url-ref="felixcloutier.com/x86/VBROADCAST.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="LD" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBROADCASTSS" category="BROADCAST" cpl="3" extension="AVX" iclass="VBROADCASTSS" iform="VBROADCASTSS_YMMqq_MEMd" isa-set="AVX" string="VBROADCASTSS (YMM, M32)" vex="1" url="uops.info/html-instr/VBROADCASTSS_YMM_M32.html" summary="Load with Broadcast Floating-Point Data" url-ref="felixcloutier.com/x86/VBROADCAST.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="LD, FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD, FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPD" category="AVX" cpl="3" extension="AVX" iclass="VCMPPD" iform="VCMPPD_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPD (XMM, XMM, M128, I8)" vex="1" url="uops.info/html-instr/VCMPPD_XMM_XMM_M128_I8.html" summary="Compare Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CMPPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPD" category="AVX" cpl="3" extension="AVX" iclass="VCMPPD" iform="VCMPPD_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPD (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VCMPPD_XMM_XMM_XMM_I8.html" summary="Compare Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CMPPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPD" category="AVX" cpl="3" extension="AVX" iclass="VCMPPD" iform="VCMPPD_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPD (YMM, YMM, M256, I8)" vex="1" url="uops.info/html-instr/VCMPPD_YMM_YMM_M256_I8.html" summary="Compare Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CMPPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPD" category="AVX" cpl="3" extension="AVX" iclass="VCMPPD" iform="VCMPPD_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPD (YMM, YMM, YMM, I8)" vex="1" url="uops.info/html-instr/VCMPPD_YMM_YMM_YMM_I8.html" summary="Compare Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CMPPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPS" category="AVX" cpl="3" extension="AVX" iclass="VCMPPS" iform="VCMPPS_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPS (XMM, XMM, M128, I8)" vex="1" url="uops.info/html-instr/VCMPPS_XMM_XMM_M128_I8.html" summary="Compare Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CMPPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPS" category="AVX" cpl="3" extension="AVX" iclass="VCMPPS" iform="VCMPPS_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPS (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VCMPPS_XMM_XMM_XMM_I8.html" summary="Compare Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CMPPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPS" category="AVX" cpl="3" extension="AVX" iclass="VCMPPS" iform="VCMPPS_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPS (YMM, YMM, M256, I8)" vex="1" url="uops.info/html-instr/VCMPPS_YMM_YMM_M256_I8.html" summary="Compare Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CMPPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPS" category="AVX" cpl="3" extension="AVX" iclass="VCMPPS" iform="VCMPPS_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPS (YMM, YMM, YMM, I8)" vex="1" url="uops.info/html-instr/VCMPPS_YMM_YMM_YMM_I8.html" summary="Compare Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CMPPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPSD" category="AVX" cpl="3" extension="AVX" iclass="VCMPSD" iform="VCMPSD_XMMdq_XMMdq_MEMq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPSD (XMM, XMM, M64, I8)" vex="1" url="uops.info/html-instr/VCMPSD_XMM_XMM_M64_I8.html" summary="Compare Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CMPSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPSD" category="AVX" cpl="3" extension="AVX" iclass="VCMPSD" iform="VCMPSD_XMMdq_XMMdq_XMMq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPSD (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VCMPSD_XMM_XMM_XMM_I8.html" summary="Compare Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CMPSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPSS" category="AVX" cpl="3" extension="AVX" iclass="VCMPSS" iform="VCMPSS_XMMdq_XMMdq_MEMd_IMMb" isa-set="AVX" mxcsr="1" string="VCMPSS (XMM, XMM, M32, I8)" vex="1" url="uops.info/html-instr/VCMPSS_XMM_XMM_M32_I8.html" summary="Compare Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CMPSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPSS" category="AVX" cpl="3" extension="AVX" iclass="VCMPSS" iform="VCMPSS_XMMdq_XMMdq_XMMd_IMMb" isa-set="AVX" mxcsr="1" string="VCMPSS (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VCMPSS_XMM_XMM_XMM_I8.html" summary="Compare Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CMPSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCOMISD" category="AVX" cpl="3" extension="AVX" iclass="VCOMISD" iform="VCOMISD_XMMq_MEMq" isa-set="AVX" mxcsr="1" string="VCOMISD (XMM, M64)" vex="1" url="uops.info/html-instr/VCOMISD_XMM_M64.html" summary="Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS" url-ref="felixcloutier.com/x86/COMISD.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="3" name="REG1" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="9" cycles_addr_index="9" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23A" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="3" cycles="9" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="10" cycles_addr_index="10" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="10" cycles_addr_index="10" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="3" cycles="6" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCOMISD" category="AVX" cpl="3" extension="AVX" iclass="VCOMISD" iform="VCOMISD_XMMq_XMMq" isa-set="AVX" mxcsr="1" string="VCOMISD (XMM, XMM)" vex="1" url="uops.info/html-instr/VCOMISD_XMM_XMM.html" summary="Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS" url-ref="felixcloutier.com/x86/COMISD.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="3" name="REG2" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="3" cycles="9" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc uops="2" ports="FP0/1, FP2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1,FP2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3, FP4" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="3" cycles="6" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="6" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCOMISS" category="AVX" cpl="3" extension="AVX" iclass="VCOMISS" iform="VCOMISS_XMMd_MEMd" isa-set="AVX" mxcsr="1" string="VCOMISS (XMM, M32)" vex="1" url="uops.info/html-instr/VCOMISS_XMM_M32.html" summary="Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS" url-ref="felixcloutier.com/x86/COMISS.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="3" name="REG1" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="9" cycles_addr_index="9" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23A" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="3" cycles="9" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="10" cycles_addr_index="10" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="10" cycles_addr_index="10" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="3" cycles="6" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCOMISS" category="AVX" cpl="3" extension="AVX" iclass="VCOMISS" iform="VCOMISS_XMMd_XMMd" isa-set="AVX" mxcsr="1" string="VCOMISS (XMM, XMM)" vex="1" url="uops.info/html-instr/VCOMISS_XMM_XMM.html" summary="Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS" url-ref="felixcloutier.com/x86/COMISS.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="3" name="REG2" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="1" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="3" cycles="9" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc uops="2" ports="FP0/1, FP2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1,FP2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3, FP4" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="3" cycles="6" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="3" cycles="6" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PD" iform="VCVTDQ2PD_XMMdq_MEMq" isa-set="AVX" string="VCVTDQ2PD (XMM, M64)" vex="1" url="uops.info/html-instr/VCVTDQ2PD_XMM_M64.html" summary="Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTDQ2PD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PD" iform="VCVTDQ2PD_XMMdq_XMMq" isa-set="AVX" string="VCVTDQ2PD (XMM, XMM)" vex="1" url="uops.info/html-instr/VCVTDQ2PD_XMM_XMM.html" summary="Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTDQ2PD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
        <doc TP="1.0" latency="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2, FP3" latency="7" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP1/2, FP3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PD" iform="VCVTDQ2PD_YMMqq_MEMdq" isa-set="AVX" string="VCVTDQ2PD (YMM, M128)" vex="1" url="uops.info/html-instr/VCVTDQ2PD_YMM_M128.html" summary="Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTDQ2PD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="7">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PD" iform="VCVTDQ2PD_YMMqq_XMMdq" isa-set="AVX" string="VCVTDQ2PD (YMM, XMM)" vex="1" url="uops.info/html-instr/VCVTDQ2PD_YMM_XMM.html" summary="Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTDQ2PD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP01+1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.67" TP_loop="0.67" uops="2" ports="1*FP12+1*FP23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PS" iform="VCVTDQ2PS_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTDQ2PS (XMM, M128)" vex="1" url="uops.info/html-instr/VCVTDQ2PS_XMM_M128.html" summary="Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTDQ2PS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PS" iform="VCVTDQ2PS_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTDQ2PS (XMM, XMM)" vex="1" url="uops.info/html-instr/VCVTDQ2PS_XMM_XMM.html" summary="Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTDQ2PS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PS" iform="VCVTDQ2PS_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTDQ2PS (YMM, M256)" vex="1" url="uops.info/html-instr/VCVTDQ2PS_YMM_M256.html" summary="Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTDQ2PS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.52" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PS" iform="VCVTDQ2PS_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTDQ2PS (YMM, YMM)" vex="1" url="uops.info/html-instr/VCVTDQ2PS_YMM_YMM.html" summary="Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTDQ2PS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3" latency="4" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2DQ" iform="VCVTPD2DQ_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTPD2DQ (XMM, M128)" vex="1" url="uops.info/html-instr/VCVTPD2DQ_XMM_M128.html" summary="Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url-ref="felixcloutier.com/x86/CVTPD2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2DQ" iform="VCVTPD2DQ_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTPD2DQ (XMM, XMM)" vex="1" url="uops.info/html-instr/VCVTPD2DQ_XMM_XMM.html" summary="Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url-ref="felixcloutier.com/x86/CVTPD2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.89" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.89" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
        <doc latency="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3, FP1/2" latency="7" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3, FP1/2" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2DQ" iform="VCVTPD2DQ_XMMdq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTPD2DQ (XMM, M256)" vex="1" url="uops.info/html-instr/VCVTPD2DQ_XMM_M256.html" summary="Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url-ref="felixcloutier.com/x86/CVTPD2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.08" TP_unrolled="1.08" uops="4">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP01+1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.40" TP_loop="0.67" uops="2" ports="1*FP12+1*FP23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2DQ" iform="VCVTPD2DQ_XMMdq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTPD2DQ (XMM, YMM)" vex="1" url="uops.info/html-instr/VCVTPD2DQ_XMM_YMM.html" summary="Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url-ref="felixcloutier.com/x86/CVTPD2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.84" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.84" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP01+1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.85" TP_loop="0.67" uops="2" ports="1*FP12+1*FP23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2PS" iform="VCVTPD2PS_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTPD2PS (XMM, M128)" vex="1" url="uops.info/html-instr/VCVTPD2PS_XMM_M128.html" summary="Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTPD2PS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2PS" iform="VCVTPD2PS_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTPD2PS (XMM, XMM)" vex="1" url="uops.info/html-instr/VCVTPD2PS_XMM_XMM.html" summary="Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTPD2PS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.89" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.89" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
        <doc latency="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="6" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2PS" iform="VCVTPD2PS_XMMdq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTPD2PS (XMM, M256)" vex="1" url="uops.info/html-instr/VCVTPD2PS_XMM_M256.html" summary="Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTPD2PS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP01+1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.40" TP_loop="0.67" uops="2" ports="1*FP12+1*FP23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2PS" iform="VCVTPD2PS_XMMdq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTPD2PS (XMM, YMM)" vex="1" url="uops.info/html-instr/VCVTPD2PS_XMM_YMM.html" summary="Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTPD2PS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.84" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.84" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP01+1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.85" TP_loop="0.67" uops="2" ports="1*FP12+1*FP23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2DQ" iform="VCVTPS2DQ_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTPS2DQ (XMM, M128)" vex="1" url="uops.info/html-instr/VCVTPS2DQ_XMM_M128.html" summary="Convert Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url-ref="felixcloutier.com/x86/CVTPS2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2DQ" iform="VCVTPS2DQ_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTPS2DQ (XMM, XMM)" vex="1" url="uops.info/html-instr/VCVTPS2DQ_XMM_XMM.html" summary="Convert Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url-ref="felixcloutier.com/x86/CVTPS2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2DQ" iform="VCVTPS2DQ_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTPS2DQ (YMM, M256)" vex="1" url="uops.info/html-instr/VCVTPS2DQ_YMM_M256.html" summary="Convert Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url-ref="felixcloutier.com/x86/CVTPS2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2DQ" iform="VCVTPS2DQ_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTPS2DQ (YMM, YMM)" vex="1" url="uops.info/html-instr/VCVTPS2DQ_YMM_YMM.html" summary="Convert Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url-ref="felixcloutier.com/x86/CVTPS2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3, FP3" latency="4" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3, FP3" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2PD" iform="VCVTPS2PD_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VCVTPS2PD (XMM, M64)" vex="1" url="uops.info/html-instr/VCVTPS2PD_XMM_M64.html" summary="Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTPS2PD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.52" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" complex_decoder="1" available_simple_decoders="5" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="5" TP_unrolled_indexed="0.88" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2PD" iform="VCVTPS2PD_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VCVTPS2PD (XMM, XMM)" vex="1" url="uops.info/html-instr/VCVTPS2PD_XMM_XMM.html" summary="Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTPS2PD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="2" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="2" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
        <doc latency="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2PD" iform="VCVTPS2PD_YMMqq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTPS2PD (YMM, M128)" vex="1" url="uops.info/html-instr/VCVTPS2PD_YMM_M128.html" summary="Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTPS2PD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.52" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.52" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" complex_decoder="1" available_simple_decoders="5" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="5" TP_unrolled_indexed="0.89" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2PD" iform="VCVTPS2PD_YMMqq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTPS2PD (YMM, XMM)" vex="1" url="uops.info/html-instr/VCVTPS2PD_YMM_XMM.html" summary="Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/CVTPS2PD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP01+1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.67" TP_loop="0.67" uops="2" ports="1*FP12+1*FP23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SI" iform="VCVTSD2SI_GPR32d_MEMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SI (R32, M64)" vex="1" url="uops.info/html-instr/VCVTSD2SI_R32_M64.html" summary="Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer" url-ref="felixcloutier.com/x86/CVTSD2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23A" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23A" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SI" iform="VCVTSD2SI_GPR32d_XMMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SI (R32, XMM)" vex="1" url="uops.info/html-instr/VCVTSD2SI_R32_XMM.html" summary="Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer" url-ref="felixcloutier.com/x86/CVTSD2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3, FP2" latency="8" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3, FP2" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3, FP4" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SI" iform="VCVTSD2SI_GPR64q_MEMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SI (R64, M64)" vex="1" url="uops.info/html-instr/VCVTSD2SI_R64_M64.html" summary="Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer" url-ref="felixcloutier.com/x86/CVTSD2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23A" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23A" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SI" iform="VCVTSD2SI_GPR64q_XMMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SI (R64, XMM)" vex="1" url="uops.info/html-instr/VCVTSD2SI_R64_XMM.html" summary="Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer" url-ref="felixcloutier.com/x86/CVTSD2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3, FP2" latency="8" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3, FP2" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3, FP4" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SS" iform="VCVTSD2SS_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SS (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VCVTSD2SS_XMM_XMM_M64.html" summary="Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSD2SS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SS" iform="VCVTSD2SS_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VCVTSD2SS_XMM_XMM_XMM.html" summary="Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSD2SS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.89" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.89" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SD" iform="VCVTSI2SD_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VCVTSI2SD (XMM, XMM, M32)" vex="1" url="uops.info/html-instr/VCVTSI2SD_XMM_XMM_M32.html" summary="Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSI2SD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SD" iform="VCVTSI2SD_XMMdq_XMMdq_GPR32d" isa-set="AVX" mxcsr="1" string="VCVTSI2SD (XMM, XMM, R32)" vex="1" url="uops.info/html-instr/VCVTSI2SD_XMM_XMM_R32.html" summary="Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSI2SD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="ALU2, FP3" latency="7" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="ALU2, FP3" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="ALU,FP2/3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.25" TP_loop="1.25" uops="2" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SD" iform="VCVTSI2SD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VCVTSI2SD (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VCVTSI2SD_XMM_XMM_M64.html" summary="Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSI2SD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SD" iform="VCVTSI2SD_XMMdq_XMMdq_GPR64q" isa-set="AVX" mxcsr="1" string="VCVTSI2SD (XMM, XMM, R64)" vex="1" url="uops.info/html-instr/VCVTSI2SD_XMM_XMM_R64.html" summary="Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSI2SD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="ALU2, FP3" latency="7" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="ALU2, FP3" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="ALU,FP2/3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.25" TP_loop="1.25" uops="2" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SS" iform="VCVTSI2SS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VCVTSI2SS (XMM, XMM, M32)" vex="1" url="uops.info/html-instr/VCVTSI2SS_XMM_XMM_M32.html" summary="Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSI2SS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SS" iform="VCVTSI2SS_XMMdq_XMMdq_GPR32d" isa-set="AVX" mxcsr="1" string="VCVTSI2SS (XMM, XMM, R32)" vex="1" url="uops.info/html-instr/VCVTSI2SS_XMM_XMM_R32.html" summary="Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSI2SS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="ALU2, FP3" latency="7" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="ALU2, FP3" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="ALU,FP2/3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.25" TP_loop="1.25" uops="2" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SS" iform="VCVTSI2SS_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VCVTSI2SS (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VCVTSI2SS_XMM_XMM_M64.html" summary="Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSI2SS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SS" iform="VCVTSI2SS_XMMdq_XMMdq_GPR64q" isa-set="AVX" mxcsr="1" string="VCVTSI2SS (XMM, XMM, R64)" vex="1" url="uops.info/html-instr/VCVTSI2SS_XMM_XMM_R64.html" summary="Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSI2SS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.94" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.94" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p015+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.93" uops="3" ports="1*p015+2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p015+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.93" uops="3" ports="1*p015+2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="ALU2, FP3" latency="7" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="ALU2, FP3" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="ALU,FP2/3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.25" TP_loop="1.25" uops="2" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SD" iform="VCVTSS2SD_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SD (XMM, XMM, M32)" vex="1" url="uops.info/html-instr/VCVTSS2SD_XMM_XMM_M32.html" summary="Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSS2SD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SD" iform="VCVTSS2SD_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VCVTSS2SD_XMM_XMM_XMM.html" summary="Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/CVTSS2SD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="2" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="2" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.48" uops="2" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SI" iform="VCVTSS2SI_GPR32d_MEMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SI (R32, M32)" vex="1" url="uops.info/html-instr/VCVTSS2SI_R32_M32.html" summary="Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer" url-ref="felixcloutier.com/x86/CVTSS2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23A" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23A" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SI" iform="VCVTSS2SI_GPR32d_XMMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SI (R32, XMM)" vex="1" url="uops.info/html-instr/VCVTSS2SI_R32_XMM.html" summary="Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer" url-ref="felixcloutier.com/x86/CVTSS2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3, ALU0" latency="8" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3, ALU0" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3, FP4" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SI" iform="VCVTSS2SI_GPR64q_MEMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SI (R64, M32)" vex="1" url="uops.info/html-instr/VCVTSS2SI_R64_M32.html" summary="Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer" url-ref="felixcloutier.com/x86/CVTSS2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23A" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23A" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SI" iform="VCVTSS2SI_GPR64q_XMMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SI (R64, XMM)" vex="1" url="uops.info/html-instr/VCVTSS2SI_R64_XMM.html" summary="Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer" url-ref="felixcloutier.com/x86/CVTSS2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3, ALU0" latency="8" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3, ALU0" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3, FP4" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPD2DQ" iform="VCVTTPD2DQ_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTTPD2DQ (XMM, M128)" vex="1" url="uops.info/html-instr/VCVTTPD2DQ_XMM_M128.html" summary="Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url-ref="felixcloutier.com/x86/CVTTPD2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPD2DQ" iform="VCVTTPD2DQ_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTTPD2DQ (XMM, XMM)" vex="1" url="uops.info/html-instr/VCVTTPD2DQ_XMM_XMM.html" summary="Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url-ref="felixcloutier.com/x86/CVTTPD2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.89" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.89" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
        <doc latency="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3, FP1/2" latency="7" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3, FP1/2" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPD2DQ" iform="VCVTTPD2DQ_XMMdq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTTPD2DQ (XMM, M256)" vex="1" url="uops.info/html-instr/VCVTTPD2DQ_XMM_M256.html" summary="Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url-ref="felixcloutier.com/x86/CVTTPD2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p1+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p1+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p01+1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.08" TP_unrolled="1.08" uops="4">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP01+1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.67" TP_loop="0.85" uops="2" ports="1*FP12+1*FP23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPD2DQ" iform="VCVTTPD2DQ_XMMdq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTTPD2DQ (XMM, YMM)" vex="1" url="uops.info/html-instr/VCVTTPD2DQ_XMM_YMM.html" summary="Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url-ref="felixcloutier.com/x86/CVTTPD2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="4" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p1+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.84" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.84" uops="2" ports="1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP01+1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.85" TP_loop="0.67" uops="2" ports="1*FP12+1*FP23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPS2DQ" iform="VCVTTPS2DQ_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTTPS2DQ (XMM, M128)" vex="1" url="uops.info/html-instr/VCVTTPS2DQ_XMM_M128.html" summary="Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url-ref="felixcloutier.com/x86/CVTTPS2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPS2DQ" iform="VCVTTPS2DQ_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTTPS2DQ (XMM, XMM)" vex="1" url="uops.info/html-instr/VCVTTPS2DQ_XMM_XMM.html" summary="Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url-ref="felixcloutier.com/x86/CVTTPS2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPS2DQ" iform="VCVTTPS2DQ_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTTPS2DQ (YMM, M256)" vex="1" url="uops.info/html-instr/VCVTTPS2DQ_YMM_M256.html" summary="Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url-ref="felixcloutier.com/x86/CVTTPS2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPS2DQ" iform="VCVTTPS2DQ_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTTPS2DQ (YMM, YMM)" vex="1" url="uops.info/html-instr/VCVTTPS2DQ_YMM_YMM.html" summary="Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url-ref="felixcloutier.com/x86/CVTTPS2DQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3" latency="4" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP2/3" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSD2SI" iform="VCVTTSD2SI_GPR32d_MEMq" isa-set="AVX" mxcsr="1" string="VCVTTSD2SI (R32, M64)" vex="1" url="uops.info/html-instr/VCVTTSD2SI_R32_M64.html" summary="Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Integer" url-ref="felixcloutier.com/x86/CVTTSD2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23A" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23A" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSD2SI" iform="VCVTTSD2SI_GPR32d_XMMq" isa-set="AVX" mxcsr="1" string="VCVTTSD2SI (R32, XMM)" vex="1" url="uops.info/html-instr/VCVTTSD2SI_R32_XMM.html" summary="Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Integer" url-ref="felixcloutier.com/x86/CVTTSD2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3, ALU0" latency="8" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3, ALU0" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3, FP4" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSD2SI" iform="VCVTTSD2SI_GPR64q_MEMq" isa-set="AVX" mxcsr="1" string="VCVTTSD2SI (R64, M64)" vex="1" url="uops.info/html-instr/VCVTTSD2SI_R64_M64.html" summary="Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Integer" url-ref="felixcloutier.com/x86/CVTTSD2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23A" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23A" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSD2SI" iform="VCVTTSD2SI_GPR64q_XMMq" isa-set="AVX" mxcsr="1" string="VCVTTSD2SI (R64, XMM)" vex="1" url="uops.info/html-instr/VCVTTSD2SI_R64_XMM.html" summary="Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Integer" url-ref="felixcloutier.com/x86/CVTTSD2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.90" uops="2" ports="1*p0+1*p015" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3, ALU0" latency="8" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3, ALU0" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3, FP4" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSS2SI" iform="VCVTTSS2SI_GPR32d_MEMd" isa-set="AVX" mxcsr="1" string="VCVTTSS2SI (R32, M32)" vex="1" url="uops.info/html-instr/VCVTTSS2SI_R32_M32.html" summary="Convert with Truncation Scalar Single-Precision Floating-Point Value to Integer" url-ref="felixcloutier.com/x86/CVTTSS2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23A" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23A" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSS2SI" iform="VCVTTSS2SI_GPR32d_XMMd" isa-set="AVX" mxcsr="1" string="VCVTTSS2SI (R32, XMM)" vex="1" url="uops.info/html-instr/VCVTTSS2SI_R32_XMM.html" summary="Convert with Truncation Scalar Single-Precision Floating-Point Value to Integer" url-ref="felixcloutier.com/x86/CVTTSS2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p0+1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p0+1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p01" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3, ALU0" latency="8" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3, ALU0" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3, FP4" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSS2SI" iform="VCVTTSS2SI_GPR64q_MEMd" isa-set="AVX" mxcsr="1" string="VCVTTSS2SI (R64, M32)" vex="1" url="uops.info/html-instr/VCVTTSS2SI_R64_M32.html" summary="Convert with Truncation Scalar Single-Precision Floating-Point Value to Integer" url-ref="felixcloutier.com/x86/CVTTSS2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.93" fusion_occurred="1" uops="3" ports="1*p0+1*p015+1*p23" TP_ports="1.00" TP_indexed="0.90" uops_indexed="3" ports_indexed="1*p0+1*p015+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p23A" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p01+1*p23A" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="12" cycles_addr_index="12" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSS2SI" iform="VCVTTSS2SI_GPR64q_XMMd" isa-set="AVX" mxcsr="1" string="VCVTTSS2SI (R64, XMM)" vex="1" url="uops.info/html-instr/VCVTTSS2SI_R64_XMM.html" summary="Convert with Truncation Scalar Single-Precision Floating-Point Value to Integer" url-ref="felixcloutier.com/x86/CVTTSS2SI.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.97" uops="2" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p0+1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p0+1*p015+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3, ALU0" latency="8" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3, ALU0" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP2/3, FP4" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP23+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPD" category="AVX" cpl="3" extension="AVX" iclass="VDIVPD" iform="VDIVPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VDIVPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VDIVPD_XMM_XMM_M128.html" summary="Divide Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DIVPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="21.00" fusion_occurred="1" latency="27" TP_no_interiteration="21.00" uops="2" div_cycles="21" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="21.00" fusion_occurred="1" TP_no_interiteration="21.00" uops="2" div_cycles="21" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="21.00" fusion_occurred="1" uops="2" div_cycles="21" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="28" max_cycles_addr_index="28" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="14.00" fusion_occurred="1" latency="26" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" fusion_occurred="1" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" fusion_occurred="1" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="11" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="26" max_cycles_addr_index="26" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="14.00" fusion_occurred="1" latency="26" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" fusion_occurred="1" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" fusion_occurred="1" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="14.00" fusion_occurred="1" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="9" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="26" max_cycles_addr_index="26" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="8.00" fusion_occurred="1" TP_no_interiteration="8.00" uops="2" div_cycles="8" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="8.00" fusion_occurred="1" uops="2" div_cycles="8" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="8.00" fusion_occurred="1" uops="2" div_cycles="8" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="2" div_cycles="9" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="10" min_cycles_is_upper_bound="1" max_cycles="14" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="16" min_cycles_addr_is_upper_bound="1" max_cycles_addr="20" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="20" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="2" div_cycles="4" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="3.99" fusion_occurred="1" uops="2" div_cycles="4" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="2" div_cycles="4" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="3.99" fusion_occurred="1" uops="2" div_cycles="4" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
        <doc TP="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="16.00" TP_loop="16.00" uops="1">
          <latency start_op="2" target_op="1" min_cycles="21" min_cycles_is_upper_bound="1" max_cycles="23" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="30" cycles_addr_is_upper_bound="1" cycles_addr_index="30" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP3" uops="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.50" TP_loop="4.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPD" category="AVX" cpl="3" extension="AVX" iclass="VDIVPD" iform="VDIVPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VDIVPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VDIVPD_XMM_XMM_XMM.html" summary="Divide Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DIVPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="21.00" latency="21" TP_no_interiteration="21.00" uops="1" div_cycles="21" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="21.00" TP_no_interiteration="21.00" uops="1" div_cycles="21" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="21.00" uops="1" div_cycles="21" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="14.00" latency="20" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="11" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="14.00" latency="20" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="13.81" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="8.02" TP_ports="1.00" TP_unrolled="8.02" div_cycles="9" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="8.00" TP_no_interiteration="8.00" uops="1" div_cycles="8" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="8.00" uops="1" div_cycles="8" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="7.88" uops="1" div_cycles="8" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1" div_cycles="9" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="10" min_cycles_is_upper_bound="1" max_cycles="14" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="10" min_cycles_is_upper_bound="1" max_cycles="14" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.00" uops="1" div_cycles="4" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="3.95" uops="1" div_cycles="4" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.00" uops="1" div_cycles="4" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="3.95" uops="1" div_cycles="4" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
        <doc TP="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="16.00" TP_loop="16.00" uops="1">
          <latency start_op="2" target_op="1" min_cycles="21" min_cycles_is_upper_bound="1" max_cycles="23" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="21" min_cycles_is_upper_bound="1" max_cycles="23" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP3" uops="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="13" TP="4.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="13" TP="6.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.50" TP_loop="4.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="13" TP="6.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPD" category="AVX" cpl="3" extension="AVX" iclass="VDIVPD" iform="VDIVPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VDIVPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VDIVPD_YMM_YMM_M256.html" summary="Divide Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DIVPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="42.00" fusion_occurred="1" latency="69" TP_no_interiteration="42.00" uops="4" div_cycles="42" ports="2*p0+1*p05+1*p23" TP_ports="2.00"/>
        <IACA version="2.2" TP="42.00" fusion_occurred="1" TP_no_interiteration="42.00" uops="4" div_cycles="42" ports="2*p0+1*p05+1*p23" TP_ports="2.00"/>
        <IACA version="2.3" TP="42.00" fusion_occurred="1" uops="4" div_cycles="42" ports="2*p0+1*p05+1*p23" TP_ports="2.00"/>
        <measurement TP_loop="20.16" TP_ports="2.00" TP_unrolled="20.07" available_simple_decoders="0" complex_decoder="1" div_cycles="6" ports="2*p0+1*p05+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="45" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="53" max_cycles_addr_index="53" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="29" min_cycles_addr_index="29" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="28.00" fusion_occurred="1" latency="54" TP_no_interiteration="28.00" uops="4" div_cycles="28" ports="2*p0+1*p05+1*p23" TP_ports="2.00"/>
        <IACA version="2.2" TP="28.00" fusion_occurred="1" TP_no_interiteration="28.00" uops="4" div_cycles="28" ports="2*p0+1*p05+1*p23" TP_ports="2.00"/>
        <IACA version="2.3" TP="28.00" fusion_occurred="1" uops="4" div_cycles="28" ports="2*p0+1*p05+1*p23" TP_ports="2.00"/>
        <measurement TP_loop="16.15" TP_ports="2.00" TP_unrolled="16.00" available_simple_decoders="0" complex_decoder="1" div_cycles="19" ports="2*p0+1*p05+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="43" max_cycles_addr_index="43" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="27" min_cycles_addr_index="27" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="28.00" fusion_occurred="1" latency="54" TP_no_interiteration="28.00" uops="4" div_cycles="28" ports="2*p0+1*p015+1*p23" TP_ports="2.00"/>
        <IACA version="2.2" TP="28.00" fusion_occurred="1" TP_no_interiteration="28.00" uops="4" div_cycles="28" ports="2*p0+1*p015+1*p23" TP_ports="2.00"/>
        <IACA version="2.3" TP="28.00" fusion_occurred="1" uops="4" div_cycles="28" ports="2*p0+1*p015+1*p23" TP_ports="2.00"/>
        <IACA version="3.0" TP="28.00" fusion_occurred="1" uops="4" div_cycles="28" ports="2*p0+1*p01+1*p23" TP_ports="2.00"/>
        <measurement TP_loop="16.22" TP_ports="2.00" TP_unrolled="16.48" available_simple_decoders="0" complex_decoder="1" div_cycles="9" ports="2*p0+1*p015+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="42" max_cycles_addr_index="42" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="26" min_cycles_addr_index="26" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="16.00" fusion_occurred="1" TP_no_interiteration="16.00" uops="4" div_cycles="16" ports="2*p0+1*p015+1*p23" TP_ports="2.00"/>
        <IACA version="2.3" TP="16.00" fusion_occurred="1" uops="4" div_cycles="16" ports="2*p0+1*p015+1*p23" TP_ports="2.00"/>
        <IACA version="3.0" TP="16.00" fusion_occurred="1" uops="4" div_cycles="16" ports="2*p0+1*p01+1*p23" TP_ports="2.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="16.33" TP_loop="16.25" uops="4" div_cycles="17" ports="2*p0+1*p015+1*p23" TP_ports="2.00">
          <latency start_op="2" target_op="1" min_cycles="19" min_cycles_is_upper_bound="1" max_cycles="23" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="26" min_cycles_addr_is_upper_bound="1" max_cycles_addr="30" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="26" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="30" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="8.00" fusion_occurred="1" uops="2" div_cycles="8" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="8.00" fusion_occurred="1" uops="2" div_cycles="8" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="21" min_cycles_addr_is_upper_bound="1" max_cycles_addr="22" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="8.00" fusion_occurred="1" uops="2" div_cycles="8" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="8.00" fusion_occurred="1" uops="2" div_cycles="8" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="21" min_cycles_addr_is_upper_bound="1" max_cycles_addr="22" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="22" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="21" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="21" min_cycles_addr_is_upper_bound="1" max_cycles_addr="22" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="21" min_cycles_addr_is_upper_bound="1" max_cycles_addr="22" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="21" min_cycles_addr_is_upper_bound="1" max_cycles_addr="22" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="21" min_cycles_addr_is_upper_bound="1" max_cycles_addr="22" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
        <doc TP="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="21" min_cycles_addr_is_upper_bound="1" max_cycles_addr="22" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="21" min_cycles_addr_is_upper_bound="1" max_cycles_addr="22" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="2" div_cycles="7" ports="1*p0+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="21" min_cycles_addr_is_upper_bound="1" max_cycles_addr="22" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="32.00" TP_loop="32.00" uops="2">
          <latency start_op="2" target_op="1" min_cycles="32" min_cycles_is_upper_bound="1" max_cycles="33" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="30" cycles_addr_is_upper_bound="1" cycles_addr_index="30" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="8.00" TP_unrolled="8.00" uops="2">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.50" TP_loop="4.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPD" category="AVX" cpl="3" extension="AVX" iclass="VDIVPD" iform="VDIVPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VDIVPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VDIVPD_YMM_YMM_YMM.html" summary="Divide Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DIVPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="42.00" latency="62" TP_no_interiteration="42.00" uops="3" div_cycles="42" ports="2*p0+1*p05" TP_ports="2.00"/>
        <IACA version="2.2" TP="42.00" TP_no_interiteration="42.00" uops="3" div_cycles="42" ports="2*p0+1*p05" TP_ports="2.00"/>
        <IACA version="2.3" TP="42.00" uops="3" div_cycles="42" ports="2*p0+1*p05" TP_ports="2.00"/>
        <measurement TP_loop="20.00" TP_ports="2.00" TP_unrolled="20.00" available_simple_decoders="0" complex_decoder="1" div_cycles="6" ports="2*p0+1*p05" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="45" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="45" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="28.00" latency="47" TP_no_interiteration="28.00" uops="3" div_cycles="28" ports="2*p0+1*p05" TP_ports="2.00"/>
        <IACA version="2.2" TP="28.00" TP_no_interiteration="28.00" uops="3" div_cycles="28" ports="2*p0+1*p05" TP_ports="2.00"/>
        <IACA version="2.3" TP="28.00" uops="3" div_cycles="28" ports="2*p0+1*p05" TP_ports="2.00"/>
        <measurement TP_loop="16.45" TP_ports="2.00" TP_unrolled="16.48" available_simple_decoders="0" complex_decoder="1" div_cycles="19" ports="2*p0+1*p05" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="28.00" latency="47" TP_no_interiteration="28.00" uops="3" div_cycles="28" ports="2*p0+1*p015" TP_ports="2.00"/>
        <IACA version="2.2" TP="28.00" TP_no_interiteration="28.00" uops="3" div_cycles="28" ports="2*p0+1*p015" TP_ports="2.00"/>
        <IACA version="2.3" TP="28.00" uops="3" div_cycles="28" ports="2*p0+1*p015" TP_ports="2.00"/>
        <IACA version="3.0" TP="27.12" uops="3" div_cycles="28" ports="2*p0+1*p01" TP_ports="2.00"/>
        <measurement TP_loop="16.12" TP_ports="2.00" TP_unrolled="16.13" available_simple_decoders="0" complex_decoder="1" div_cycles="9" ports="2*p0+1*p015" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="16.00" TP_no_interiteration="16.00" uops="3" div_cycles="16" ports="2*p0+1*p015" TP_ports="2.00"/>
        <IACA version="2.3" TP="16.00" uops="3" div_cycles="16" ports="2*p0+1*p015" TP_ports="2.00"/>
        <IACA version="3.0" TP="15.47" uops="3" div_cycles="16" ports="2*p0+1*p01" TP_ports="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="16.00" TP_loop="16.11" uops="3" div_cycles="17" ports="2*p0+1*p015" TP_ports="2.00">
          <latency start_op="2" target_op="1" min_cycles="19" min_cycles_is_upper_bound="1" max_cycles="23" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="19" min_cycles_is_upper_bound="1" max_cycles="23" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="8.00" uops="1" div_cycles="8" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="7.90" uops="1" div_cycles="8" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="8.00" uops="1" div_cycles="8" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="7.90" uops="1" div_cycles="8" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
        <doc TP="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="32.00" TP_loop="32.00" uops="2">
          <latency start_op="2" target_op="1" min_cycles="32" min_cycles_is_upper_bound="1" max_cycles="33" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="32" min_cycles_is_upper_bound="1" max_cycles="33" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="8.00" TP_unrolled="8.00" uops="2">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3" latency="15" TP="9.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="13" TP="6.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.50" TP_loop="4.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="13" TP="6.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPS" category="AVX" cpl="3" extension="AVX" iclass="VDIVPS" iform="VDIVPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VDIVPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VDIVPS_XMM_XMM_M128.html" summary="Divide Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DIVPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="14.00" fusion_occurred="1" latency="20" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" fusion_occurred="1" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" fusion_occurred="1" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="20" max_cycles_addr_index="20" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="7.00" fusion_occurred="1" latency="19" TP_no_interiteration="7.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="7.00" fusion_occurred="1" TP_no_interiteration="7.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="7.00" fusion_occurred="1" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="7.00" TP_ports="1.00" TP_unrolled="7.00" div_cycles="11" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="19" max_cycles_addr_index="19" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="7.00" fusion_occurred="1" latency="19" TP_no_interiteration="7.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="7.00" fusion_occurred="1" TP_no_interiteration="7.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="7.00" fusion_occurred="1" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="7.00" fusion_occurred="1" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="7.00" TP_ports="1.00" TP_unrolled="7.00" div_cycles="9" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="19" max_cycles_addr_index="19" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="5.00" fusion_occurred="1" TP_no_interiteration="5.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="5.00" fusion_occurred="1" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="5.00" fusion_occurred="1" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="2" div_cycles="10" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="3.00" fusion_occurred="1" uops="2" div_cycles="3" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="2.94" fusion_occurred="1" uops="2" div_cycles="3" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="3.00" fusion_occurred="1" uops="2" div_cycles="3" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="2.94" fusion_occurred="1" uops="2" div_cycles="3" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
        <doc TP="3.0" latency="17.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="10.00" TP_loop="10.00" uops="1">
          <latency start_op="2" target_op="1" min_cycles="15" min_cycles_is_upper_bound="1" max_cycles="17" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="24" cycles_addr_is_upper_bound="1" cycles_addr_index="24" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPS" category="AVX" cpl="3" extension="AVX" iclass="VDIVPS" iform="VDIVPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VDIVPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VDIVPS_XMM_XMM_XMM.html" summary="Divide Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DIVPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="14.00" latency="14" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="7.00" latency="13" TP_no_interiteration="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="7.00" TP_no_interiteration="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="7.07" TP_ports="1.00" TP_unrolled="7.09" div_cycles="11" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="7.00" latency="13" TP_no_interiteration="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="7.00" TP_no_interiteration="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="6.90" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="7.03" TP_ports="1.00" TP_unrolled="7.02" div_cycles="9" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="5.00" TP_no_interiteration="5.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="5.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="4.92" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1" div_cycles="10" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="3.00" uops="1" div_cycles="3" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="2.96" uops="1" div_cycles="3" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="3.00" uops="1" div_cycles="3" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="2.96" uops="1" div_cycles="3" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
        <doc TP="3.0" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="10.00" TP_loop="10.00" uops="1">
          <latency start_op="2" target_op="1" min_cycles="15" min_cycles_is_upper_bound="1" max_cycles="17" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="15" min_cycles_is_upper_bound="1" max_cycles="17" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="10" TP="3.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="10" TP="5.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="10" TP="5.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPS" category="AVX" cpl="3" extension="AVX" iclass="VDIVPS" iform="VDIVPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VDIVPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VDIVPS_YMM_YMM_M256.html" summary="Divide Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DIVPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="28.00" fusion_occurred="1" latency="48" TP_no_interiteration="28.00" uops="4" div_cycles="28" ports="2*p0+1*p05+1*p23" TP_ports="2.00"/>
        <IACA version="2.2" TP="28.00" fusion_occurred="1" TP_no_interiteration="28.00" uops="4" div_cycles="28" ports="2*p0+1*p05+1*p23" TP_ports="2.00"/>
        <IACA version="2.3" TP="28.00" fusion_occurred="1" uops="4" div_cycles="28" ports="2*p0+1*p05+1*p23" TP_ports="2.00"/>
        <measurement TP_loop="20.16" TP_ports="2.00" TP_unrolled="20.08" available_simple_decoders="0" complex_decoder="1" div_cycles="6" ports="2*p0+1*p05+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="29" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="37" max_cycles_addr_index="37" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="29" min_cycles_addr_index="29" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="14.00" fusion_occurred="1" latency="33" TP_no_interiteration="14.00" uops="3" div_cycles="14" ports="2*p0+1*p23" TP_ports="2.00"/>
        <IACA version="2.2" TP="14.00" fusion_occurred="1" TP_no_interiteration="14.00" uops="3" div_cycles="14" ports="2*p0+1*p23" TP_ports="2.00"/>
        <IACA version="2.3" TP="14.00" fusion_occurred="1" uops="3" div_cycles="14" ports="2*p0+1*p23" TP_ports="2.00"/>
        <measurement TP_loop="14.06" TP_ports="2.00" TP_unrolled="14.00" available_simple_decoders="0" complex_decoder="1" div_cycles="18" ports="2*p0+1*p05+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="29" max_cycles_addr_index="29" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="26" min_cycles_addr_index="26" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="14.00" fusion_occurred="1" latency="33" TP_no_interiteration="14.00" uops="4" div_cycles="14" ports="2*p0+1*p015+1*p23" TP_ports="2.00"/>
        <IACA version="2.2" TP="14.00" fusion_occurred="1" TP_no_interiteration="14.00" uops="4" div_cycles="14" ports="2*p0+1*p015+1*p23" TP_ports="2.00"/>
        <IACA version="2.3" TP="14.00" fusion_occurred="1" uops="4" div_cycles="14" ports="2*p0+1*p015+1*p23" TP_ports="2.00"/>
        <IACA version="3.0" TP="14.00" fusion_occurred="1" uops="4" div_cycles="14" ports="2*p0+1*p01+1*p23" TP_ports="2.00"/>
        <measurement TP_loop="14.18" TP_ports="2.00" TP_unrolled="14.11" available_simple_decoders="0" complex_decoder="1" div_cycles="9" ports="2*p0+1*p015+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="28" max_cycles_addr_index="28" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="25" min_cycles_addr_index="25" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="10.00" fusion_occurred="1" TP_no_interiteration="10.00" uops="4" div_cycles="10" ports="2*p0+1*p015+1*p23" TP_ports="2.00"/>
        <IACA version="2.3" TP="10.00" fusion_occurred="1" uops="4" div_cycles="10" ports="2*p0+1*p015+1*p23" TP_ports="2.00"/>
        <IACA version="3.0" TP="10.00" fusion_occurred="1" uops="4" div_cycles="10" ports="2*p0+1*p01+1*p23" TP_ports="2.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="10.35" TP_loop="10.45" uops="4" div_cycles="15" ports="2*p0+1*p015+1*p23" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="24" cycles_addr_is_upper_bound="1" cycles_addr_index="24" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="5.00" fusion_occurred="1" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="5.00" fusion_occurred="1" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="2" div_cycles="10" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="5.00" fusion_occurred="1" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="5.00" fusion_occurred="1" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="2" div_cycles="10" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="2" div_cycles="10" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="2" div_cycles="10" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="2" div_cycles="10" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="2" div_cycles="10" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
        <doc TP="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="2" div_cycles="10" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="2" div_cycles="10" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="2" div_cycles="5" ports="1*p0+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="20.00" TP_loop="20.00" uops="2">
          <latency start_op="2" target_op="1" min_cycles="20" min_cycles_is_upper_bound="1" max_cycles="24" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="24" cycles_addr_is_upper_bound="1" cycles_addr_index="24" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="2">
          <latency max_cycles="11" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPS" category="AVX" cpl="3" extension="AVX" iclass="VDIVPS" iform="VDIVPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VDIVPS (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VDIVPS_YMM_YMM_YMM.html" summary="Divide Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DIVPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="28.00" latency="41" TP_no_interiteration="28.00" uops="3" div_cycles="28" ports="2*p0+1*p05" TP_ports="2.00"/>
        <IACA version="2.2" TP="28.00" TP_no_interiteration="28.00" uops="3" div_cycles="28" ports="2*p0+1*p05" TP_ports="2.00"/>
        <IACA version="2.3" TP="28.00" uops="3" div_cycles="28" ports="2*p0+1*p05" TP_ports="2.00"/>
        <measurement TP_loop="20.00" TP_ports="2.00" TP_unrolled="20.00" available_simple_decoders="0" complex_decoder="1" div_cycles="6" ports="2*p0+1*p05" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="29" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="29" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="14.00" latency="26" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="2*p0" TP_ports="2.00"/>
        <IACA version="2.2" TP="14.00" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="2*p0" TP_ports="2.00"/>
        <IACA version="2.3" TP="14.00" uops="2" div_cycles="14" ports="2*p0" TP_ports="2.00"/>
        <measurement TP_loop="14.06" TP_ports="2.00" TP_unrolled="14.15" available_simple_decoders="0" complex_decoder="1" div_cycles="18" ports="2*p0+1*p05" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="14.00" latency="26" TP_no_interiteration="14.00" uops="3" div_cycles="14" ports="2*p0+1*p015" TP_ports="2.00"/>
        <IACA version="2.2" TP="14.00" TP_no_interiteration="14.00" uops="3" div_cycles="14" ports="2*p0+1*p015" TP_ports="2.00"/>
        <IACA version="2.3" TP="14.00" uops="3" div_cycles="14" ports="2*p0+1*p015" TP_ports="2.00"/>
        <IACA version="3.0" TP="13.56" uops="3" div_cycles="14" ports="2*p0+1*p01" TP_ports="2.00"/>
        <measurement TP_loop="14.00" TP_ports="2.00" TP_unrolled="14.11" available_simple_decoders="0" complex_decoder="1" div_cycles="9" ports="2*p0+1*p015" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="10.00" TP_no_interiteration="10.00" uops="3" div_cycles="10" ports="2*p0+1*p015" TP_ports="2.00"/>
        <IACA version="2.3" TP="10.00" uops="3" div_cycles="10" ports="2*p0+1*p015" TP_ports="2.00"/>
        <IACA version="3.0" TP="9.67" uops="3" div_cycles="10" ports="2*p0+1*p01" TP_ports="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="10.00" TP_loop="10.06" uops="3" div_cycles="15" ports="2*p0+1*p015" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="17" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="5.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="4.94" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1" div_cycles="10" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="5.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="4.94" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1" div_cycles="10" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1" div_cycles="10" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1" div_cycles="10" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1" div_cycles="10" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1" div_cycles="10" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
        <doc TP="5.0" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1" div_cycles="10" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1" div_cycles="10" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="20.00" TP_loop="20.00" uops="2">
          <latency start_op="2" target_op="1" min_cycles="20" min_cycles_is_upper_bound="1" max_cycles="24" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="20" min_cycles_is_upper_bound="1" max_cycles="24" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP3" latency="12" TP="6.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="10" TP="5.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="10" TP="5.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVSD" category="AVX" cpl="3" extension="AVX" iclass="VDIVSD" iform="VDIVSD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VDIVSD (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VDIVSD_XMM_XMM_M64.html" summary="Divide Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/DIVSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="21.00" fusion_occurred="1" latency="27" TP_no_interiteration="21.00" uops="2" div_cycles="21" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="21.00" fusion_occurred="1" TP_no_interiteration="21.00" uops="2" div_cycles="21" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="21.00" fusion_occurred="1" uops="2" div_cycles="21" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="28" max_cycles_addr_index="28" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="14.00" fusion_occurred="1" latency="26" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" fusion_occurred="1" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" fusion_occurred="1" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="11" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="26" max_cycles_addr_index="26" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="14.00" fusion_occurred="1" latency="26" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" fusion_occurred="1" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" fusion_occurred="1" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="14.00" fusion_occurred="1" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="9" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="26" max_cycles_addr_index="26" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="4.00" fusion_occurred="1" TP_no_interiteration="4.00" uops="2" div_cycles="4" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="2" div_cycles="4" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="4.00" fusion_occurred="1" uops="2" div_cycles="4" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="9" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="10" min_cycles_is_upper_bound="1" max_cycles="14" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="16" min_cycles_addr_is_upper_bound="1" max_cycles_addr="20" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="20" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="2" div_cycles="4" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="4.00" fusion_occurred="1" uops="2" div_cycles="4" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="2" div_cycles="4" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="4.00" fusion_occurred="1" uops="2" div_cycles="4" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
        <doc TP="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="2" div_cycles="7" ports="1*p0+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles_addr="20" min_cycles_addr_is_upper_bound="1" max_cycles_addr="21" max_cycles_addr_is_upper_bound="1" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="22" cycles_addr_is_upper_bound="1" cycles_addr_index="22" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP3" uops="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.50" TP_loop="4.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVSD" category="AVX" cpl="3" extension="AVX" iclass="VDIVSD" iform="VDIVSD_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VDIVSD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VDIVSD_XMM_XMM_XMM.html" summary="Divide Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/DIVSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="21.00" latency="21" TP_no_interiteration="21.00" uops="1" div_cycles="21" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="21.00" TP_no_interiteration="21.00" uops="1" div_cycles="21" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="21.00" uops="1" div_cycles="21" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="14.00" latency="20" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="11" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="14.00" latency="20" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="13.81" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="8.02" TP_ports="1.00" TP_unrolled="8.02" div_cycles="9" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="4.00" TP_no_interiteration="4.00" uops="1" div_cycles="4" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="4.00" uops="1" div_cycles="4" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="3.94" uops="1" div_cycles="4" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="9" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="10" min_cycles_is_upper_bound="1" max_cycles="14" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="10" min_cycles_is_upper_bound="1" max_cycles="14" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.00" uops="1" div_cycles="4" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="3.95" uops="1" div_cycles="4" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.00" uops="1" div_cycles="4" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="3.95" uops="1" div_cycles="4" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
        <doc TP="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.00" TP_loop="4.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="8.00" TP_loop="8.00" uops="1">
          <latency start_op="2" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="13" min_cycles_is_upper_bound="1" max_cycles="15" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP3" uops="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="13" TP="4.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="13" TP="6.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.50" TP_loop="4.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="13" TP="6.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="13" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVSS" category="AVX" cpl="3" extension="AVX" iclass="VDIVSS" iform="VDIVSS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VDIVSS (XMM, XMM, M32)" vex="1" url="uops.info/html-instr/VDIVSS_XMM_XMM_M32.html" summary="Divide Scalar Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DIVSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="14.00" fusion_occurred="1" latency="20" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" fusion_occurred="1" TP_no_interiteration="14.00" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" fusion_occurred="1" uops="2" div_cycles="14" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="20" max_cycles_addr_index="20" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="7.00" fusion_occurred="1" latency="19" TP_no_interiteration="7.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="7.00" fusion_occurred="1" TP_no_interiteration="7.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="7.00" fusion_occurred="1" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="7.00" TP_ports="1.00" TP_unrolled="7.00" div_cycles="11" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="19" max_cycles_addr_index="19" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="7.00" fusion_occurred="1" latency="19" TP_no_interiteration="7.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="7.00" fusion_occurred="1" TP_no_interiteration="7.00" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="7.00" fusion_occurred="1" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="7.00" fusion_occurred="1" uops="2" div_cycles="7" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="7.00" TP_ports="1.00" TP_unrolled="7.00" div_cycles="9" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="19" max_cycles_addr_index="19" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="3.00" fusion_occurred="1" TP_no_interiteration="3.00" uops="2" div_cycles="3" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="3.00" fusion_occurred="1" uops="2" div_cycles="3" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="2.93" fusion_occurred="1" uops="2" div_cycles="3" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="2.50" TP_loop="2.50" uops="2" div_cycles="10" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="3.00" fusion_occurred="1" uops="2" div_cycles="3" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="2.96" fusion_occurred="1" uops="2" div_cycles="3" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="3.00" fusion_occurred="1" uops="2" div_cycles="3" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="3.0" TP="2.96" fusion_occurred="1" uops="2" div_cycles="3" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
        <doc TP="3.0" latency="17.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="2" div_cycles="5" ports="1*p0+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1">
          <latency start_op="2" target_op="1" min_cycles="10" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVSS" category="AVX" cpl="3" extension="AVX" iclass="VDIVSS" iform="VDIVSS_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VDIVSS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VDIVSS_XMM_XMM_XMM.html" summary="Divide Scalar Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DIVSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="14.00" latency="14" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="14.00" TP_no_interiteration="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="14.00" uops="1" div_cycles="14" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="7.00" latency="13" TP_no_interiteration="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="7.00" TP_no_interiteration="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="7.07" TP_ports="1.00" TP_unrolled="7.09" div_cycles="11" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="7.00" latency="13" TP_no_interiteration="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="7.00" TP_no_interiteration="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="7.00" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="6.90" uops="1" div_cycles="7" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="7.03" TP_ports="1.00" TP_unrolled="7.03" div_cycles="9" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="1" div_cycles="3" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="3.00" uops="1" div_cycles="3" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="2.95" uops="1" div_cycles="3" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="2.50" TP_loop="2.57" uops="1" div_cycles="10" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="3.00" uops="1" div_cycles="3" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="2.96" uops="1" div_cycles="3" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="3.00" uops="1" div_cycles="3" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="2.96" uops="1" div_cycles="3" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
        <doc TP="3.0" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="3.00" TP_loop="3.00" uops="1" div_cycles="5" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="11" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="5.00" TP_loop="5.00" uops="1">
          <latency start_op="2" target_op="1" min_cycles="10" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" min_cycles="10" min_cycles_is_upper_bound="1" max_cycles="12" max_cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="10" TP="3.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="10" TP="5.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.50" TP_loop="3.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP3" latency="10" TP="5.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDPPD" category="AVX" cpl="3" extension="AVX" iclass="VDPPD" iform="VDPPD_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPD (XMM, XMM, M128, I8)" vex="1" url="uops.info/html-instr/VDPPD_XMM_XMM_M128_I8.html" summary="Dot Product of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DPPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="15" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="15" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="15" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="4" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p1+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="4" ports="1*p0+1*p1+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="2*p015+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="2*p015+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="4" ports="2*p01+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="2*p01+1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="2*p01+1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="2*p01+1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="4" ports="2*p01+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="2*p01+1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="1" complex_decoder="1" ports="2*p01+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="2*p01+1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="2*p01+1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="2*p01+1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.06" TP_loop="1.06" uops="4" ports="2*p01+1*p15+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="15.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.05" TP_loop="1.06" uops="4" ports="2*p01+1*p15+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.06" TP_loop="1.06" uops="4" ports="2*p01+1*p15+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.04" TP_loop="1.06" uops="4" ports="2*p01+1*p15+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="4" complex_decoder="1" TP_unrolled="6.00" TP_loop="5.20" uops="5">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="5">
          <latency cycles="10" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="5">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="5" ports="1*FP0123+1*FP1+1*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="5" ports="1*FP01+1*FP123+1*FP23" TP_ports="0.75">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDPPD" category="AVX" cpl="3" extension="AVX" iclass="VDPPD" iform="VDPPD_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPD (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VDPPD_XMM_XMM_XMM_I8.html" summary="Dot Product of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DPPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="9" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="3" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="3" ports="1*p0+1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p1+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="3" ports="2*p015+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="3" ports="2*p01" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="3" ports="2*p01+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="3" ports="2*p01" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p01+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.06" TP_loop="1.06" uops="3" ports="2*p01+1*p15" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
        <doc latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.06" TP_loop="1.06" uops="3" ports="2*p01+1*p15" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.05" TP_loop="1.06" uops="3" ports="2*p01+1*p15" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.04" TP_loop="1.05" uops="3" ports="2*p01+1*p15" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.57" TP_loop="3.20" uops="4">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles="13"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="3">
          <latency cycles="10" start_op="2" target_op="1"/>
          <latency cycles="10" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="10" TP="3.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="3">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="1*FP0123+1*FP1+1*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="1*FP01+1*FP123+1*FP23" TP_ports="0.75">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDPPS" category="AVX" cpl="3" extension="AVX" iclass="VDPPS" iform="VDPPS_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPS (XMM, XMM, M128, I8)" vex="1" url="uops.info/html-instr/VDPPS_XMM_XMM_M128_I8.html" summary="Dot Product of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DPPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p23+2*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p23+2*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="5" ports="2*p0+1*p1+1*p23+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="5" ports_indexed="2*p0+1*p1+1*p23+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="5" ports="2*p0+1*p1+1*p23+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="5" ports_indexed="2*p0+1*p1+1*p23+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" uops="5" ports="2*p0+1*p1+1*p23" TP_ports="2.00"/>
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p06+1*p1+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="5" ports="2*p0+1*p1+1*p23+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="5" ports_indexed="2*p0+1*p1+1*p23+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="5" ports="2*p0+1*p1+1*p23+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="5" ports_indexed="2*p0+1*p1+1*p23+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" uops="5" ports="2*p0+1*p1+1*p23" TP_ports="2.00"/>
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="2*p0+1*p06+1*p1+1*p23+1*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="12"/>
          <latency start_op="3" target_op="1" cycles_addr="18" cycles_addr_is_upper_bound="1" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.33" fusion_occurred="1" uops="5" ports="3*p015+1*p23+1*p5" TP_ports="1.33" TP_indexed="1.33" uops_indexed="5" ports_indexed="3*p015+1*p23+1*p5" TP_ports_indexed="1.33"/>
        <IACA version="3.0" TP="1.50" uops="5" ports="3*p01+1*p23" TP_ports="1.50"/>
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p23+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles_addr="20" cycles_addr_is_upper_bound="1" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.50" fusion_occurred="1" uops="5" ports="3*p01+1*p23+1*p5" TP_ports="1.50" TP_indexed="1.50" uops_indexed="5" ports_indexed="3*p01+1*p23+1*p5" TP_ports_indexed="1.50"/>
        <IACA version="3.0" TP="1.50" uops="5" ports="3*p01+1*p23" TP_ports="1.50"/>
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p23+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles_addr="20" cycles_addr_is_upper_bound="1" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p23+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles_addr="20" cycles_addr_is_upper_bound="1" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p23+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles_addr="20" cycles_addr_is_upper_bound="1" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p23+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles_addr="20" cycles_addr_is_upper_bound="1" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="7" uops_MITE="4" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="7" ports="3*p01+1*p06+1*p15+1*p23+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="7" uops_MITE="4" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="7" ports="3*p01+1*p06+1*p15+1*p23+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="7" uops_MITE="4" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="7" ports="3*p01+1*p06+1*p15+1*p23+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="7" uops_MITE="4" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="3.96" TP_loop="4.00" uops="7" ports="2*p01+1*p06+2*p15+1*p23A+1*p5" TP_ports="1.67" uops_retire_slots_indexed="7" uops_MITE_indexed="4" uops_MS_indexed="3" complex_decoder_indexed="1" available_simple_decoders_indexed="0" TP_unrolled_indexed="4.00" TP_loop_indexed="4.00" uops_indexed="7" ports_indexed="2*p01+1*p05+1*p06+3*p15+1*p23A" TP_ports_indexed="2.00">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="8" complex_decoder="1" TP_unrolled="9.98" TP_loop="9.73" uops="9">
          <latency start_op="2" target_op="1" cycles="21"/>
          <latency start_op="3" target_op="1" cycles_addr="27" cycles_addr_is_upper_bound="1" cycles_addr_index="27" cycles_addr_index_is_upper_bound="1" cycles_mem="27" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="5.00" TP_unrolled="5.00" uops="10">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles_addr="23" cycles_addr_index="23" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="22" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="10">
          <latency start_op="2" target_op="1" cycles="15"/>
          <latency start_op="3" target_op="1" cycles_addr="23" cycles_addr_is_upper_bound="1" cycles_addr_index="23" cycles_addr_index_is_upper_bound="1" cycles_mem="22" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="10" ports="1*FP0123+2*FP1+2*FP23" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="15"/>
          <latency start_op="3" target_op="1" cycles_addr="23" cycles_addr_is_upper_bound="1" cycles_addr_index="23" cycles_addr_index_is_upper_bound="1" cycles_mem="23" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="5.00" TP_loop="5.00" uops="10" ports="1*FP01+1*FP123+3*FP23" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="11"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDPPS" category="AVX" cpl="3" extension="AVX" iclass="VDPPS" iform="VDPPS_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPS (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VDPPS_XMM_XMM_XMM_I8.html" summary="Dot Product of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DPPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.03" TP_ports="2.00" TP_unrolled="2.03" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="12" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.03" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="12" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="14" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.93" uops="4" ports="2*p0+1*p1" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.93" uops="4" ports="2*p0+1*p1" TP_ports="2.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="12"/>
          <latency start_op="3" target_op="1" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.34" uops="4" ports="3*p015+1*p5" TP_ports="1.33"/>
        <IACA version="3.0" TP="1.42" uops="4" ports="3*p01" TP_ports="1.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.50" TP_loop="1.50" uops="4" ports="3*p01+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles="13"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.50" uops="4" ports="3*p01+1*p5" TP_ports="1.50"/>
        <IACA version="3.0" TP="1.42" uops="4" ports="3*p01" TP_ports="1.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.57" TP_loop="1.54" uops="4" ports="3*p01+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles="13"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.50" TP_loop="1.50" uops="4" ports="3*p01+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles="13"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.50" TP_loop="1.50" uops="4" ports="3*p01+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles="13"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.56" TP_loop="1.54" uops="4" ports="3*p01+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles="13"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="6" uops_MITE="3" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="6" ports="3*p01+1*p06+1*p15+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles="14"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="6" uops_MITE="3" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p15+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles="14"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="6" uops_MITE="3" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p15+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles="14"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="6" uops_MITE="3" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="3.95" TP_loop="4.00" uops="6" ports="2*p01+1*p06+2*p15+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles="14"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="7" complex_decoder="1" TP_unrolled="5.58" TP_loop="5.60" uops="8">
          <latency start_op="2" target_op="1" cycles="21"/>
          <latency start_op="3" target_op="1" cycles="21"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="8">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles="15" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="15" TP="4.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="8">
          <latency start_op="2" target_op="1" cycles="15"/>
          <latency start_op="3" target_op="1" cycles="15"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="1*FP0123+2*FP1+2*FP23" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="15"/>
          <latency start_op="3" target_op="1" cycles="15"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="2*FP1+3*FP23" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="11"/>
          <latency start_op="3" target_op="1" cycles="11"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDPPS" category="AVX" cpl="3" extension="AVX" iclass="VDPPS" iform="VDPPS_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPS (YMM, YMM, M256, I8)" vex="1" url="uops.info/html-instr/VDPPS_YMM_YMM_M256_I8.html" summary="Dot Product of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DPPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p23+2*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p23+2*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="5" ports="2*p0+1*p1+1*p23+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="5" ports_indexed="2*p0+1*p1+1*p23+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="5" ports="2*p0+1*p1+1*p23+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="5" ports_indexed="2*p0+1*p1+1*p23+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" uops="5" ports="2*p0+1*p1+1*p23" TP_ports="2.00"/>
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p06+1*p1+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="20" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="5" ports="2*p0+1*p1+1*p23+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="5" ports_indexed="2*p0+1*p1+1*p23+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="5" ports="2*p0+1*p1+1*p23+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="5" ports_indexed="2*p0+1*p1+1*p23+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" uops="5" ports="2*p0+1*p1+1*p23" TP_ports="2.00"/>
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="2*p0+1*p06+1*p1+1*p23+1*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="12"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.33" fusion_occurred="1" uops="5" ports="3*p015+1*p23+1*p5" TP_ports="1.33" TP_indexed="1.33" uops_indexed="5" ports_indexed="3*p015+1*p23+1*p5" TP_ports_indexed="1.33"/>
        <IACA version="3.0" TP="1.50" uops="5" ports="3*p01+1*p23" TP_ports="1.50"/>
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p23+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.50" fusion_occurred="1" uops="5" ports="3*p01+1*p23+1*p5" TP_ports="1.50" TP_indexed="1.50" uops_indexed="5" ports_indexed="3*p01+1*p23+1*p5" TP_ports_indexed="1.50"/>
        <IACA version="3.0" TP="1.50" uops="5" ports="3*p01+1*p23" TP_ports="1.50"/>
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p23+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p23+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p23+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="6" uops_MITE="4" uops_MS="2" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p23+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles_addr="21" cycles_addr_is_upper_bound="1" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="7" uops_MITE="4" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="7" ports="3*p01+1*p06+1*p15+1*p23+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles_addr="22" cycles_addr_is_upper_bound="1" cycles_addr_index="22" cycles_addr_index_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="7" uops_MITE="4" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="7" ports="3*p01+1*p06+1*p15+1*p23+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles_addr="22" cycles_addr_is_upper_bound="1" cycles_addr_index="22" cycles_addr_index_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="7" uops_MITE="4" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="7" ports="3*p01+1*p06+1*p15+1*p23+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles_addr="22" cycles_addr_is_upper_bound="1" cycles_addr_index="22" cycles_addr_index_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="7" uops_MITE="4" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="3.96" TP_loop="4.00" uops="7" ports="2*p01+1*p06+2*p15+1*p23A+1*p5" TP_ports="1.67" uops_retire_slots_indexed="7" uops_MITE_indexed="4" uops_MS_indexed="3" complex_decoder_indexed="1" available_simple_decoders_indexed="0" TP_unrolled_indexed="3.96" TP_loop_indexed="4.00" uops_indexed="7" ports_indexed="2*p01+1*p05+1*p06+3*p15+1*p23A" TP_ports_indexed="2.00">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles_addr="22" cycles_addr_is_upper_bound="1" cycles_addr_index="22" cycles_addr_index_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="16" complex_decoder="1" TP_unrolled="10.00" TP_loop="9.67" uops="17">
          <latency start_op="2" target_op="1" cycles="24"/>
          <latency start_op="3" target_op="1" cycles_addr="29" cycles_addr_is_upper_bound="1" cycles_addr_index="29" cycles_addr_index_is_upper_bound="1" cycles_mem="30" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="5.00" TP_unrolled="5.00" uops="14">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles_addr="24" cycles_addr_index="24" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="23" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="8">
          <latency start_op="2" target_op="1" cycles="15"/>
          <latency start_op="3" target_op="1" cycles_addr="23" cycles_addr_is_upper_bound="1" cycles_addr_index="23" cycles_addr_index_is_upper_bound="1" cycles_mem="23" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="1*FP0123+1*FP1+1*FP12+1*FP23+1*FP3" TP_ports="1.33">
          <latency start_op="2" target_op="1" cycles="15"/>
          <latency start_op="3" target_op="1" cycles_addr="23" cycles_addr_is_upper_bound="1" cycles_addr_index="23" cycles_addr_index_is_upper_bound="1" cycles_mem="24" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="1*FP0123+1*FP1+3*FP23" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="11"/>
          <latency start_op="3" target_op="1" cycles_addr="19" cycles_addr_is_upper_bound="1" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_mem="20" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDPPS" category="AVX" cpl="3" extension="AVX" iclass="VDPPS" iform="VDPPS_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPS (YMM, YMM, YMM, I8)" vex="1" url="uops.info/html-instr/VDPPS_YMM_YMM_YMM_I8.html" summary="Dot Product of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/DPPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.03" TP_ports="2.00" TP_unrolled="2.03" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="12" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="4" ports="1*p0+2*p1+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.03" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="12" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="14" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.93" uops="4" ports="2*p0+1*p1" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.93" uops="4" ports="2*p0+1*p1" TP_ports="2.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="2*p0+1*p1+1*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="12"/>
          <latency start_op="3" target_op="1" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.34" uops="4" ports="3*p015+1*p5" TP_ports="1.33"/>
        <IACA version="3.0" TP="1.42" uops="4" ports="3*p01" TP_ports="1.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.50" TP_loop="1.50" uops="4" ports="3*p01+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles="13"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.50" uops="4" ports="3*p01+1*p5" TP_ports="1.50"/>
        <IACA version="3.0" TP="1.42" uops="4" ports="3*p01" TP_ports="1.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.57" TP_loop="1.54" uops="4" ports="3*p01+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles="13"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.50" TP_loop="1.50" uops="4" ports="3*p01+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles="13"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.50" TP_loop="1.50" uops="4" ports="3*p01+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles="13"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="1.60" TP_loop="1.54" uops="4" ports="3*p01+1*p5" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="13"/>
          <latency start_op="3" target_op="1" cycles="13"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="6" uops_MITE="3" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p15+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles="14"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="6" uops_MITE="3" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p15+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles="14"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="6" uops_MITE="3" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="6" ports="3*p01+1*p06+1*p15+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles="14"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="6" uops_MITE="3" uops_MS="3" complex_decoder="1" available_simple_decoders="0" TP_unrolled="3.94" TP_loop="4.00" uops="6" ports="2*p01+1*p06+2*p15+1*p5" TP_ports="1.67">
          <latency start_op="2" target_op="1" cycles="14"/>
          <latency start_op="3" target_op="1" cycles="14"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="14" complex_decoder="1" TP_unrolled="6.70" TP_loop="6.67" uops="15">
          <latency start_op="2" target_op="1" cycles="25"/>
          <latency start_op="3" target_op="1" cycles="24"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="5.00" TP_unrolled="5.00" uops="13">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles="15" cycles_same_reg="16" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="16" TP="5.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="7">
          <latency start_op="2" target_op="1" cycles="15"/>
          <latency start_op="3" target_op="1" cycles="15"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="7" ports="1*FP0123+1*FP1+1*FP12+1*FP23+1*FP3" TP_ports="1.33">
          <latency start_op="2" target_op="1" cycles="15"/>
          <latency start_op="3" target_op="1" cycles="15"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="7" ports="1*FP01+1*FP123+3*FP23" TP_ports="1.50">
          <latency start_op="2" target_op="1" cycles="11"/>
          <latency start_op="3" target_op="1" cycles="11"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VEXTRACTF128" category="AVX" cpl="3" extension="AVX" iclass="VEXTRACTF128" iform="VEXTRACTF128_MEMdq_YMMdq_IMMb" isa-set="AVX" string="VEXTRACTF128 (M128, YMM, I8)" vex="1" url="uops.info/html-instr/VEXTRACTF128_M128_YMM_I8.html" summary="Extr act Packed Floating-Point Values" url-ref="felixcloutier.com/x86/VEXTRACTF128:VEXTRACTF32x4:VEXTRACTF64x2:VEXTRACTF32x8:VEXTRACTF64x4.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP01+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP0123+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VEXTRACTF128" category="AVX" cpl="3" extension="AVX" iclass="VEXTRACTF128" iform="VEXTRACTF128_XMMdq_YMMdq_IMMb" isa-set="AVX" string="VEXTRACTF128 (XMM, YMM, I8)" vex="1" url="uops.info/html-instr/VEXTRACTF128_XMM_YMM_I8.html" summary="Extr act Packed Floating-Point Values" url-ref="felixcloutier.com/x86/VEXTRACTF128:VEXTRACTF32x4:VEXTRACTF64x2:VEXTRACTF32x8:VEXTRACTF64x4.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.37" ports="1*FP013" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="2" ports="FP0, FP4" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.37" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VEXTRACTPS" category="AVX" cpl="3" extension="AVX" iclass="VEXTRACTPS" iform="VEXTRACTPS_MEMd_XMMdq_IMMb" isa-set="AVX" string="VEXTRACTPS (M32, XMM, I8)" vex="1" url="uops.info/html-instr/VEXTRACTPS_M32_XMM_I8.html" summary="Extract Packed Floating-Point Values" url-ref="felixcloutier.com/x86/EXTRACTPS.html">
      <operand idx="1" memory-prefix="dword ptr" name="MEM0" type="mem" w="1" width="32" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="3" ports="1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p4+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="3" ports="1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p23+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.88" fusion_occurred="1" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="0.80" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.88" fusion_occurred="1" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="0.80" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="6" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="0.76" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="0.76" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p237+1*p4+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p23+1*p4+1*p5" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p49+1*p5+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p49+1*p5+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p49+1*p5+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p49+1*p5+1*p78" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="1*p49+1*p78" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP12+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP123+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VEXTRACTPS" category="AVX" cpl="3" extension="AVX" iclass="VEXTRACTPS" iform="VEXTRACTPS_GPR32_XMMdq_IMMb" isa-set="AVX" string="VEXTRACTPS (R32, XMM, I8)" vex="1" url="uops.info/html-instr/VEXTRACTPS_R32_XMM_I8.html" summary="Extract Packed Floating-Point Values" url-ref="felixcloutier.com/x86/EXTRACTPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.96" uops="2" ports="1*p0+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="6" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP12+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="6" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP123+1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="6" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPD" category="AVX" cpl="3" extension="AVX" iclass="VHADDPD" iform="VHADDPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VHADDPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VHADDPD_XMM_XMM_M128.html" summary="Packed Double-FP Horizontal Add" url-ref="felixcloutier.com/x86/HADDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="2.0" latency="12.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p15+1*p23A+2*p5" TP_ports="2.00" uops_retire_slots_indexed="4" uops_MITE_indexed="4" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="3*p15+1*p23A" TP_ports_indexed="1.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="4" complex_decoder="1" TP_unrolled="4.73" TP_loop="4.62" uops="5">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP123+2*FP23" TP_ports="1.00" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="1*FP1+2*FP23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPD" category="AVX" cpl="3" extension="AVX" iclass="VHADDPD" iform="VHADDPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VHADDPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VHADDPD_XMM_XMM_XMM.html" summary="Packed Double-FP Horizontal Add" url-ref="felixcloutier.com/x86/HADDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc TP="2.0" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p15+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.58" TP_loop="3.16" uops="4">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="6" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP123+2*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPD" category="AVX" cpl="3" extension="AVX" iclass="VHADDPD" iform="VHADDPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VHADDPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VHADDPD_YMM_YMM_M256.html" summary="Packed Double-FP Horizontal Add" url-ref="felixcloutier.com/x86/HADDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="2.0" latency="13.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p15+1*p23A+2*p5" TP_ports="2.00" uops_retire_slots_indexed="4" uops_MITE_indexed="4" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="3*p15+1*p23A" TP_ports_indexed="1.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="8" complex_decoder="1" TP_unrolled="4.73" TP_loop="4.62" uops="9">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="10">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+2*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPD" category="AVX" cpl="3" extension="AVX" iclass="VHADDPD" iform="VHADDPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VHADDPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VHADDPD_YMM_YMM_YMM.html" summary="Packed Double-FP Horizontal Add" url-ref="felixcloutier.com/x86/HADDPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc TP="2.0" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p15+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="6" complex_decoder="1" TP_unrolled="3.63" TP_loop="3.16" uops="7">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="8">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="6" TP="3.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*FP1+1*FP12+1*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*FP123+2*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPS" category="AVX" cpl="3" extension="AVX" iclass="VHADDPS" iform="VHADDPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VHADDPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VHADDPS_XMM_XMM_M128.html" summary="Packed Single-FP Horizontal Add" url-ref="felixcloutier.com/x86/HADDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="12.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p15+1*p23A+2*p5" TP_ports="2.00" uops_retire_slots_indexed="4" uops_MITE_indexed="4" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="3*p15+1*p23A" TP_ports_indexed="1.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="4" complex_decoder="1" TP_unrolled="4.72" TP_loop="4.62" uops="5">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+2*FP23" TP_ports="1.00" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="1*FP123+2*FP23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPS" category="AVX" cpl="3" extension="AVX" iclass="VHADDPS" iform="VHADDPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VHADDPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VHADDPS_XMM_XMM_XMM.html" summary="Packed Single-FP Horizontal Add" url-ref="felixcloutier.com/x86/HADDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p15+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.58" TP_loop="3.16" uops="4">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="6" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP123+2*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPS" category="AVX" cpl="3" extension="AVX" iclass="VHADDPS" iform="VHADDPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VHADDPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VHADDPS_YMM_YMM_M256.html" summary="Packed Single-FP Horizontal Add" url-ref="felixcloutier.com/x86/HADDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="13.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p15+1*p23A+2*p5" TP_ports="2.00" uops_retire_slots_indexed="4" uops_MITE_indexed="4" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="3*p15+1*p23A" TP_ports_indexed="1.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="8" complex_decoder="1" TP_unrolled="4.72" TP_loop="4.62" uops="9">
          <latency start_op="2" target_op="1" cycles="10"/>
          <latency start_op="3" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="11">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP123+2*FP23" TP_ports="1.00" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="1*FP1+2*FP23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPS" category="AVX" cpl="3" extension="AVX" iclass="VHADDPS" iform="VHADDPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VHADDPS (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VHADDPS_YMM_YMM_YMM.html" summary="Packed Single-FP Horizontal Add" url-ref="felixcloutier.com/x86/HADDPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p15+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="6" complex_decoder="1" TP_unrolled="3.63" TP_loop="3.16" uops="7">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="8">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="6" TP="3.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*FP1+1*FP12+1*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*FP123+2*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPD" iform="VHSUBPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VHSUBPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VHSUBPD_XMM_XMM_M128.html" summary="Packed Double-FP Horizontal Subtract" url-ref="felixcloutier.com/x86/HSUBPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="2.0" latency="12.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p15+1*p23A+2*p5" TP_ports="2.00" uops_retire_slots_indexed="4" uops_MITE_indexed="4" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="3*p15+1*p23A" TP_ports_indexed="1.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="4" complex_decoder="1" TP_unrolled="4.73" TP_loop="4.62" uops="5">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP123+2*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPD" iform="VHSUBPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VHSUBPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VHSUBPD_XMM_XMM_XMM.html" summary="Packed Double-FP Horizontal Subtract" url-ref="felixcloutier.com/x86/HSUBPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc TP="2.0" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p15+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.58" TP_loop="3.16" uops="4">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="6" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP123+2*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPD" iform="VHSUBPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VHSUBPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VHSUBPD_YMM_YMM_M256.html" summary="Packed Double-FP Horizontal Subtract" url-ref="felixcloutier.com/x86/HSUBPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="2.0" latency="13.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p15+1*p23A+2*p5" TP_ports="2.00" uops_retire_slots_indexed="4" uops_MITE_indexed="4" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="3*p15+1*p23A" TP_ports_indexed="1.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="8" complex_decoder="1" TP_unrolled="4.72" TP_loop="4.62" uops="9">
          <latency start_op="2" target_op="1" cycles="10"/>
          <latency start_op="3" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="10">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP123+2*FP23" TP_ports="1.00" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="1*FP1+2*FP23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPD" iform="VHSUBPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VHSUBPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VHSUBPD_YMM_YMM_YMM.html" summary="Packed Double-FP Horizontal Subtract" url-ref="felixcloutier.com/x86/HSUBPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc TP="2.0" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p15+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="6" complex_decoder="1" TP_unrolled="3.63" TP_loop="3.16" uops="7">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="8">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="6" TP="3.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="2.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*FP1+1*FP12+1*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*FP123+2*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPS" iform="VHSUBPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VHSUBPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VHSUBPS_XMM_XMM_M128.html" summary="Packed Single-FP Horizontal Subtract" url-ref="felixcloutier.com/x86/HSUBPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="11" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="12.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p15+1*p23A+2*p5" TP_ports="2.00" uops_retire_slots_indexed="4" uops_MITE_indexed="4" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="3*p15+1*p23A" TP_ports_indexed="1.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="4" complex_decoder="1" TP_unrolled="4.73" TP_loop="4.62" uops="5">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+2*FP23" TP_ports="1.00" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="1*FP123+2*FP23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPS" iform="VHSUBPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VHSUBPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VHSUBPS_XMM_XMM_XMM.html" summary="Packed Single-FP Horizontal Subtract" url-ref="felixcloutier.com/x86/HSUBPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p15+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.56" TP_loop="3.16" uops="4">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="6" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP123+2*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPS" iform="VHSUBPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VHSUBPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VHSUBPS_YMM_YMM_M256.html" summary="Packed Single-FP Horizontal Subtract" url-ref="felixcloutier.com/x86/HSUBPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="12" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p1+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="4" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p1+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p01+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="0.75" fusion_occurred="1" uops="4" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="13.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p01+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p15+1*p23A+2*p5" TP_ports="2.00" uops_retire_slots_indexed="4" uops_MITE_indexed="4" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="2" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="3*p15+1*p23A" TP_ports_indexed="1.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="8" complex_decoder="1" TP_unrolled="4.75" TP_loop="4.62" uops="9">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="10">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+1*FP12+1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP123+2*FP23" TP_ports="1.00" TP_unrolled_indexed="2.00" TP_loop_indexed="2.00" uops_indexed="4" ports_indexed="1*FP1+2*FP23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPS" iform="VHSUBPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VHSUBPS (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VHSUBPS_YMM_YMM_YMM.html" summary="Packed Single-FP Horizontal Subtract" url-ref="felixcloutier.com/x86/HSUBPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.95" uops="3" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p1+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="0.71" uops="3" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p01+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p15+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="6" complex_decoder="1" TP_unrolled="3.63" TP_loop="3.16" uops="7">
          <latency start_op="2" target_op="1" cycles="9"/>
          <latency start_op="3" target_op="1" cycles="9"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="8">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="6" TP="3.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="2.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*FP1+1*FP12+1*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc uops="ucode" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*FP123+2*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VINSERTF128" category="AVX" cpl="3" extension="AVX" iclass="VINSERTF128" iform="VINSERTF128_YMMqq_YMMqq_MEMdq_IMMb" isa-set="AVX" string="VINSERTF128 (YMM, YMM, M128, I8)" vex="1" url="uops.info/html-instr/VINSERTF128_YMM_YMM_M128_I8.html" summary="Insert Packed Floating-Point Values" url-ref="felixcloutier.com/x86/VINSERTF128:VINSERTF32x4:VINSERTF64x2:VINSERTF32x8:VINSERTF64x4.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p05+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p05+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="0.33" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.75" TP_loop="0.75" uops="2">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VINSERTF128" category="AVX" cpl="3" extension="AVX" iclass="VINSERTF128" iform="VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb" isa-set="AVX" string="VINSERTF128 (YMM, YMM, XMM, I8)" vex="1" url="uops.info/html-instr/VINSERTF128_YMM_YMM_XMM_I8.html" summary="Insert Packed Floating-Point Values" url-ref="felixcloutier.com/x86/VINSERTF128:VINSERTF32x4:VINSERTF64x2:VINSERTF32x8:VINSERTF64x4.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.67" TP_unrolled="0.67" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1/3, FP0/1/3" latency="1" TP="0.67"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3, FP0/1/3" latency="2" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VINSERTPS" category="AVX" cpl="3" extension="AVX" iclass="VINSERTPS" iform="VINSERTPS_XMMdq_XMMdq_MEMd_IMMb" isa-set="AVX" string="VINSERTPS (XMM, XMM, M32, I8)" vex="1" url="uops.info/html-instr/VINSERTPS_XMM_XMM_M32_I8.html" summary="Insert Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/INSERTPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VINSERTPS" category="AVX" cpl="3" extension="AVX" iclass="VINSERTPS" iform="VINSERTPS_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VINSERTPS (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VINSERTPS_XMM_XMM_XMM_I8.html" summary="Insert Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/INSERTPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="1.0" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VLDDQU" category="AVX" cpl="3" extension="AVX" iclass="VLDDQU" iform="VLDDQU_XMMdq_MEMdq" isa-set="AVX" string="VLDDQU (XMM, M128)" vex="1" url="uops.info/html-instr/VLDDQU_XMM_M128.html" summary="Load Unaligned Integer 128 Bits" url-ref="felixcloutier.com/x86/LDDQU.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.49" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.49" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.49" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.49" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="LD" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VLDDQU" category="AVX" cpl="3" extension="AVX" iclass="VLDDQU" iform="VLDDQU_YMMqq_MEMqq" isa-set="AVX" string="VLDDQU (YMM, M256)" vex="1" url="uops.info/html-instr/VLDDQU_YMM_M256.html" summary="Load Unaligned Integer 128 Bits" url-ref="felixcloutier.com/x86/LDDQU.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.49" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.49" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.49" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" fusion_occurred="1" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.49" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="LD" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VLDMXCSR" category="AVX" cpl="3" extension="AVX" iclass="VLDMXCSR" iform="VLDMXCSR_MEMd" isa-set="AVX" mxcsr="1" string="VLDMXCSR (M32)" vex="1" url="uops.info/html-instr/VLDMXCSR_M32.html" summary="Load MXCSR Register" url-ref="felixcloutier.com/x86/LDMXCSR.html">
      <operand idx="1" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <operand idx="2" name="REG0" suppressed="1" type="reg" w="1">MXCSR</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="2.25" TP_ports="1.00" TP_unrolled="2.30" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3"/>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p23+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="2.36" TP_ports="1.00" TP_unrolled="2.40" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3"/>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p237+1*p4+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p23+1*p4+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p0156+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p0156+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p0156+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p0156+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p0156+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p0156+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="2.25" TP_ports="1.00" TP_unrolled="2.30" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3"/>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p0+1*p0156+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p0156+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p0156+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p0156+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p0156+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p0156+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.30" TP_loop="2.25" uops="3" ports="1*p0+1*p06+1*p23" TP_ports="1.00"/>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p0156+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p0156+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p0156+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p0156+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="2.91" uops="4" ports="1*p0+1*p06+1*p23+1*p5" TP_ports="1.00"/>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p0156+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p0156+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p0156+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p0156+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="2.91" uops="4" ports="1*p0+1*p06+1*p23+1*p5" TP_ports="1.00"/>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.91" TP_ports="1.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="1*p0+1*p06+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4"/>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="2.91" uops="4" ports="1*p0+1*p06+1*p23+1*p5" TP_ports="1.00"/>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.20" uops="4" ports="1*p0+1*p015+1*p06+1*p23" TP_ports="1.00"/>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="2.62" TP_loop="2.91" uops="4" ports="1*p0+1*p06+1*p23+1*p5" TP_ports="1.00"/>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.40" TP_loop="3.38" uops="4" ports="1*p0+1*p015+1*p06+1*p23" TP_ports="1.00"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.40" TP_loop="3.38" uops="4" ports="1*p0+1*p015+1*p06+1*p23" TP_ports="1.00"/>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.40" TP_loop="3.37" uops="4" ports="1*p0+1*p015+1*p06+1*p23" TP_ports="1.00"/>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="11" uops_MITE="3" uops_MS="8" complex_decoder="1" available_simple_decoders="0" TP_unrolled="6.00" TP_loop="6.00" uops="3" ports="1*p0+1*p06+1*p23A" TP_ports="1.00"/>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="5" complex_decoder="1" TP_unrolled="8.00" TP_loop="7.87" uops="5"/>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP3" uops="1"/>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="1" ports="1*FP3" TP_ports="1.00"/>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.50" TP_loop="1.50" uops="1" ports="1*FP01" TP_ports="0.50"/>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="1" ports="1*FP01" TP_ports="0.50"/>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVDQU" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVDQU" iform="VMASKMOVDQU_XMMxub_XMMxub" isa-set="AVX" string="VMASKMOVDQU (XMM, XMM)" vex="1" url="uops.info/html-instr/VMASKMOVDQU_XMM_XMM.html" summary="Conditional SIMD Packed Loads and Stores" url-ref="felixcloutier.com/x86/VMASKMOV.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand base="RDI" idx="3" memory-prefix="xmmword ptr" name="MEM0" seg="DS" suppressed="1" type="mem" w="1" width="128" xtype="u8"/>
      <architecture name="SNB">
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p05+1*p15+4*p23+2*p4+1*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="22" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p05+1*p15+4*p23+2*p4+1*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="21" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p06+4*p23+2*p4+1*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="20" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="10" uops_MITE="0" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="6.00" TP_loop="6.00" uops="10" ports="1*p0+2*p06+4*p23+2*p4+1*p5" TP_ports="2.00">
          <latency start_op="3" target_op="3" cycles_addr="22" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p237+1*p4" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="2" ports="1*p0+1*p237+1*p4" TP_ports="1.00"/>
        <measurement uops_retire_slots="10" uops_MITE="0" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="6.00" TP_loop="6.00" uops="10" ports="1*p0+1*p06+4*p23+2*p4+2*p5" TP_ports="2.00">
          <latency start_op="3" target_op="3" cycles_addr="9" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p237+1*p4" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="2" ports="1*p0+1*p237+1*p4" TP_ports="1.00"/>
        <measurement uops_retire_slots="10" uops_MITE="0" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="6.00" TP_loop="6.00" uops="10" ports="1*p0+1*p06+4*p23+2*p4+2*p5" TP_ports="2.00">
          <latency start_op="3" target_op="3" cycles_addr="9" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+4*p23+2*p4+2*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="9" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="10" uops_MITE="0" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="6.00" TP_loop="6.00" uops="10" ports="1*p0+1*p06+4*p23+2*p4+2*p5" TP_ports="2.00">
          <latency start_op="3" target_op="3" cycles_addr="9" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="10" uops_MITE="0" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="7.00" TP_loop="7.00" uops="10" ports="1*p0+1*p06+4*p23+2*p4+2*p5" TP_ports="2.00">
          <latency start_op="3" target_op="3" cycles_addr="9" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="10" uops_MITE="0" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="6.00" TP_loop="6.00" uops="10" ports="1*p0+1*p06+4*p23+2*p4+2*p5" TP_ports="2.00">
          <latency start_op="3" target_op="3" cycles_addr="8" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="10" uops_MITE="0" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="7.00" TP_loop="7.00" uops="10" ports="1*p0+1*p06+1*p15+2*p23+2*p49+1*p5+2*p78" TP_ports="1.00">
          <latency start_op="3" target_op="3" cycles_addr="12" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="10" uops_MITE="0" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="7.00" TP_loop="7.00" uops="10" ports="1*p0+1*p06+1*p15+2*p23+2*p49+1*p5+2*p78" TP_ports="1.00">
          <latency start_op="3" target_op="3" cycles_addr="12" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="10" uops_MITE="0" uops_MS="10" complex_decoder="1" available_simple_decoders="0" TP_unrolled="7.00" TP_loop="7.00" uops="10" ports="1*p0+1*p06+1*p15+2*p23+2*p49+1*p5+2*p78" TP_ports="1.00">
          <latency start_op="3" target_op="3" cycles_addr="12" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="10" uops_MITE="4" uops_MS="6" complex_decoder="1" available_simple_decoders="0" TP_unrolled="6.00" TP_loop="6.00" uops="10" ports="1*p0+1*p06+1*p15+2*p23A+2*p49+1*p5+2*p78" TP_ports="1.00">
          <latency start_op="3" target_op="3" cycles_addr="10" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="3.53" TP_loop="3.15" uops="3">
          <latency start_op="3" target_op="3" cycles_addr="539" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="18.00" TP_unrolled="18.00" uops="60">
          <latency cycles_addr="13" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="18.00" TP_loop="18.00" uops="60">
          <latency start_op="3" target_op="3" cycles_addr="13" cycles_addr_is_upper_bound="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="18.00" TP_loop="18.00" uops="75" ports="2*FP0123+4*FP1+2*FP12+4*FP45" TP_ports="4.00">
          <latency start_op="3" target_op="3" cycles_addr="20" cycles_addr_is_upper_bound="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="18.00" TP_loop="18.00" uops="75" ports="2*FP12+3*FP123+3*FP23+4*FP45" TP_ports="2.67">
          <latency start_op="3" target_op="3" cycles_addr="20" cycles_addr_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPD" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPD" iform="VMASKMOVPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VMASKMOVPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VMASKMOVPD_XMM_XMM_M128.html" summary="Conditional SIMD Packed Loads and Stores" url-ref="felixcloutier.com/x86/VMASKMOV.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="8" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="0.97" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="0.33" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="0" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPD" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPD" iform="VMASKMOVPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VMASKMOVPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VMASKMOVPD_YMM_YMM_M256.html" summary="Conditional SIMD Packed Loads and Stores" url-ref="felixcloutier.com/x86/VMASKMOV.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="9" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="0.96" TP_loop="0.40" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="0.93" TP_loop_indexed="0.40" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="7.00" TP_unrolled="7.00" uops="24">
          <latency cycles="17" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="2" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPD" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPD" iform="VMASKMOVPD_MEMdq_XMMdq_XMMdq" isa-set="AVX" string="VMASKMOVPD (M128, XMM, XMM)" vex="1" url="uops.info/html-instr/VMASKMOVPD_M128_XMM_XMM.html" summary="Conditional SIMD Packed Loads and Stores" url-ref="felixcloutier.com/x86/VMASKMOV.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="4" cycles_addr_is_upper_bound="1" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="2" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="0.76" uops_indexed="2" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="2" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="0.76" uops_indexed="2" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="20" cycles_addr_is_upper_bound="1" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="16" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="10">
          <latency cycles_addr="3" cycles_addr_index="3" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="10">
          <latency start_op="1" target_op="1" cycles_addr="4" cycles_addr_is_upper_bound="1" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="10" ports="1*FP1+3*FP45" TP_ports="1.50">
          <latency start_op="1" target_op="1" cycles_addr="3" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="10" ports="1*FP23+3*FP45" TP_ports="1.50" TP_unrolled_indexed="4.00" TP_loop_indexed="4.00" uops_indexed="10" ports_indexed="1*FP1+3*FP45" TP_ports_indexed="1.50">
          <latency start_op="1" target_op="1" cycles_addr="4" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPD" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPD" iform="VMASKMOVPD_MEMqq_YMMqq_YMMqq" isa-set="AVX" string="VMASKMOVPD (M256, YMM, YMM)" vex="1" url="uops.info/html-instr/VMASKMOVPD_M256_YMM_YMM.html" summary="Conditional SIMD Packed Loads and Stores" url-ref="felixcloutier.com/x86/VMASKMOV.html">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="4" cycles_addr_is_upper_bound="1" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="2" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="0.76" uops_indexed="2" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="2" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="0.76" uops_indexed="2" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="20" cycles_addr_is_upper_bound="1" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="18" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="17" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="18">
          <latency cycles_addr="3" cycles_addr_index="3" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="6.00" TP_loop="6.00" uops="19">
          <latency start_op="1" target_op="1" cycles_addr="4" cycles_addr_is_upper_bound="1" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="6.00" TP_loop="6.00" uops="18" ports="3*FP1+5*FP45" TP_ports="3.00">
          <latency start_op="1" target_op="1" cycles_addr="3" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="6.00" TP_loop="6.00" uops="18" ports="3*FP1+5*FP45" TP_ports="3.00">
          <latency start_op="1" target_op="1" cycles_addr="4" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPS" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPS" iform="VMASKMOVPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VMASKMOVPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VMASKMOVPS_XMM_XMM_M128.html" summary="Conditional SIMD Packed Loads and Stores" url-ref="felixcloutier.com/x86/VMASKMOV.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="8" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="0.97" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="0.97" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="0.97" TP_loop_indexed="0.33" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="20">
          <latency cycles="16" start_op="2" target_op="1"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="0" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPS" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPS" iform="VMASKMOVPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VMASKMOVPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VMASKMOVPS_YMM_YMM_M256.html" summary="Conditional SIMD Packed Loads and Stores" url-ref="felixcloutier.com/x86/VMASKMOV.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p05+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p05+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="9" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="0.93" TP_loop="0.40" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="0.40" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="10.00" TP_unrolled="10.00" uops="36">
          <latency cycles="20" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPS" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPS" iform="VMASKMOVPS_MEMdq_XMMdq_XMMdq" isa-set="AVX" string="VMASKMOVPS (M128, XMM, XMM)" vex="1" url="uops.info/html-instr/VMASKMOVPS_M128_XMM_XMM.html" summary="Conditional SIMD Packed Loads and Stores" url-ref="felixcloutier.com/x86/VMASKMOV.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p01+1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p01+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="4" cycles_addr_is_upper_bound="1" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="2" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="0.76" uops_indexed="2" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="2" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="0.76" uops_indexed="2" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="20" cycles_addr_is_upper_bound="1" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="16" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="19">
          <latency cycles_addr="3" cycles_addr_index="3" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="6.00" TP_loop="6.00" uops="19">
          <latency start_op="1" target_op="1" cycles_addr="4" cycles_addr_is_upper_bound="1" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="6.00" TP_loop="6.00" uops="18" ports="3*FP1+5*FP45" TP_ports="3.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="6.00" TP_loop="6.00" uops="18" ports="1*FP123+2*FP23+5*FP45" TP_ports="2.50" TP_unrolled_indexed="6.00" TP_loop_indexed="6.00" uops_indexed="18" ports_indexed="3*FP1+5*FP45" TP_ports_indexed="3.00">
          <latency start_op="1" target_op="1" cycles_addr="4" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPS" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPS" iform="VMASKMOVPS_MEMqq_YMMqq_YMMqq" isa-set="AVX" string="VMASKMOVPS (M256, YMM, YMM)" vex="1" url="uops.info/html-instr/VMASKMOVPS_M256_YMM_YMM.html" summary="Conditional SIMD Packed Loads and Stores" url-ref="felixcloutier.com/x86/VMASKMOV.html">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p01+1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p01+1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="4" ports="1*p0+1*p15+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+1*p15+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="1*p0+1*p1+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="4" cycles_addr_is_upper_bound="1" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="2" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="0.76" uops_indexed="2" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.85" fusion_occurred="1" uops="2" ports="1*p0+1*p237+1*p4" TP_ports="1.00" TP_indexed="0.76" uops_indexed="2" ports_indexed="1*p0+1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p23+1*p4" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p0+1*p49+1*p78" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="20" cycles_addr_is_upper_bound="1" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="18" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="17" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="12.00" TP_unrolled="12.00" uops="42">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="12.00" TP_loop="12.00" uops="44">
          <latency start_op="1" target_op="1" cycles_addr="4" cycles_addr_is_upper_bound="1" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="2" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="12.00" TP_loop="12.00" uops="42" ports="1*FP0123+4*FP1+2*FP12+9*FP45" TP_ports="4.50">
          <latency start_op="1" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="12.00" TP_loop="12.00" uops="42" ports="4*FP1+1*FP123+2*FP23+9*FP45" TP_ports="4.50">
          <latency start_op="1" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
          <latency start_op="3" target_op="1" cycles="0" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPD" category="AVX" cpl="3" extension="AVX" iclass="VMAXPD" iform="VMAXPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMAXPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VMAXPD_XMM_XMM_M128.html" summary="Maximum of Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MAXPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPD" category="AVX" cpl="3" extension="AVX" iclass="VMAXPD" iform="VMAXPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMAXPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMAXPD_XMM_XMM_XMM.html" summary="Maximum of Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MAXPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPD" category="AVX" cpl="3" extension="AVX" iclass="VMAXPD" iform="VMAXPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMAXPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VMAXPD_YMM_YMM_M256.html" summary="Maximum of Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MAXPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPD" category="AVX" cpl="3" extension="AVX" iclass="VMAXPD" iform="VMAXPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMAXPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VMAXPD_YMM_YMM_YMM.html" summary="Maximum of Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MAXPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPS" category="AVX" cpl="3" extension="AVX" iclass="VMAXPS" iform="VMAXPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMAXPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VMAXPS_XMM_XMM_M128.html" summary="Maximum of Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MAXPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPS" category="AVX" cpl="3" extension="AVX" iclass="VMAXPS" iform="VMAXPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMAXPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMAXPS_XMM_XMM_XMM.html" summary="Maximum of Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MAXPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPS" category="AVX" cpl="3" extension="AVX" iclass="VMAXPS" iform="VMAXPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMAXPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VMAXPS_YMM_YMM_M256.html" summary="Maximum of Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MAXPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPS" category="AVX" cpl="3" extension="AVX" iclass="VMAXPS" iform="VMAXPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMAXPS (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VMAXPS_YMM_YMM_YMM.html" summary="Maximum of Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MAXPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXSD" category="AVX" cpl="3" extension="AVX" iclass="VMAXSD" iform="VMAXSD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VMAXSD (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VMAXSD_XMM_XMM_M64.html" summary="Return Maximum Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MAXSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXSD" category="AVX" cpl="3" extension="AVX" iclass="VMAXSD" iform="VMAXSD_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VMAXSD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMAXSD_XMM_XMM_XMM.html" summary="Return Maximum Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MAXSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXSS" category="AVX" cpl="3" extension="AVX" iclass="VMAXSS" iform="VMAXSS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VMAXSS (XMM, XMM, M32)" vex="1" url="uops.info/html-instr/VMAXSS_XMM_XMM_M32.html" summary="Return Maximum Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MAXSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXSS" category="AVX" cpl="3" extension="AVX" iclass="VMAXSS" iform="VMAXSS_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VMAXSS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMAXSS_XMM_XMM_XMM.html" summary="Return Maximum Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MAXSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPD" category="AVX" cpl="3" extension="AVX" iclass="VMINPD" iform="VMINPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMINPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VMINPD_XMM_XMM_M128.html" summary="Minimum of Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MINPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPD" category="AVX" cpl="3" extension="AVX" iclass="VMINPD" iform="VMINPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMINPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMINPD_XMM_XMM_XMM.html" summary="Minimum of Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MINPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPD" category="AVX" cpl="3" extension="AVX" iclass="VMINPD" iform="VMINPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMINPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VMINPD_YMM_YMM_M256.html" summary="Minimum of Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MINPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPD" category="AVX" cpl="3" extension="AVX" iclass="VMINPD" iform="VMINPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMINPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VMINPD_YMM_YMM_YMM.html" summary="Minimum of Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MINPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPS" category="AVX" cpl="3" extension="AVX" iclass="VMINPS" iform="VMINPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMINPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VMINPS_XMM_XMM_M128.html" summary="Minimum of Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MINPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPS" category="AVX" cpl="3" extension="AVX" iclass="VMINPS" iform="VMINPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMINPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMINPS_XMM_XMM_XMM.html" summary="Minimum of Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MINPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPS" category="AVX" cpl="3" extension="AVX" iclass="VMINPS" iform="VMINPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMINPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VMINPS_YMM_YMM_M256.html" summary="Minimum of Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MINPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPS" category="AVX" cpl="3" extension="AVX" iclass="VMINPS" iform="VMINPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMINPS (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VMINPS_YMM_YMM_YMM.html" summary="Minimum of Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MINPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINSD" category="AVX" cpl="3" extension="AVX" iclass="VMINSD" iform="VMINSD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VMINSD (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VMINSD_XMM_XMM_M64.html" summary="Return Minimum Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MINSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINSD" category="AVX" cpl="3" extension="AVX" iclass="VMINSD" iform="VMINSD_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VMINSD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMINSD_XMM_XMM_XMM.html" summary="Return Minimum Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MINSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINSS" category="AVX" cpl="3" extension="AVX" iclass="VMINSS" iform="VMINSS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VMINSS (XMM, XMM, M32)" vex="1" url="uops.info/html-instr/VMINSS_XMM_XMM_M32.html" summary="Return Minimum Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MINSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="9" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p1+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p1+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p1+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.56" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINSS" category="AVX" cpl="3" extension="AVX" iclass="VMINSS" iform="VMINSS_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VMINSS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMINSS_XMM_XMM_XMM.html" summary="Return Minimum Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MINSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p1" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p1" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p1" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_XMMdq_MEMdq" isa-set="AVX" string="VMOVAPD (XMM, M128)" vex="1" url="uops.info/html-instr/VMOVAPD_XMM_M128.html" summary="Move Aligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_XMMdq_XMMdq_28" isa-set="AVX" string="VMOVAPD_28 (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVAPD_28_XMM_XMM.html" summary="Move Aligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_MEMdq_XMMdq" isa-set="AVX" string="VMOVAPD (M128, XMM)" vex="1" url="uops.info/html-instr/VMOVAPD_M128_XMM.html" summary="Move Aligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPD.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_XMMdq_XMMdq_29" isa-set="AVX" string="VMOVAPD_29 (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVAPD_29_XMM_XMM.html" summary="Move Aligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_YMMqq_MEMqq" isa-set="AVX" string="VMOVAPD (YMM, M256)" vex="1" url="uops.info/html-instr/VMOVAPD_YMM_M256.html" summary="Move Aligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_YMMqq_YMMqq_28" isa-set="AVX" string="VMOVAPD_28 (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVAPD_28_YMM_YMM.html" summary="Move Aligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_MEMqq_YMMqq" isa-set="AVX" string="VMOVAPD (M256, YMM)" vex="1" url="uops.info/html-instr/VMOVAPD_M256_YMM.html" summary="Move Aligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPD.html">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_YMMqq_YMMqq_29" isa-set="AVX" string="VMOVAPD_29 (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVAPD_29_YMM_YMM.html" summary="Move Aligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_XMMdq_MEMdq" isa-set="AVX" string="VMOVAPS (XMM, M128)" vex="1" url="uops.info/html-instr/VMOVAPS_XMM_M128.html" summary="Move Aligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_XMMdq_XMMdq_28" isa-set="AVX" string="VMOVAPS_28 (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVAPS_28_XMM_XMM.html" summary="Move Aligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_MEMdq_XMMdq" isa-set="AVX" string="VMOVAPS (M128, XMM)" vex="1" url="uops.info/html-instr/VMOVAPS_M128_XMM.html" summary="Move Aligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPS.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_XMMdq_XMMdq_29" isa-set="AVX" string="VMOVAPS_29 (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVAPS_29_XMM_XMM.html" summary="Move Aligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_YMMqq_MEMqq" isa-set="AVX" string="VMOVAPS (YMM, M256)" vex="1" url="uops.info/html-instr/VMOVAPS_YMM_M256.html" summary="Move Aligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.35" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_YMMqq_YMMqq_28" isa-set="AVX" string="VMOVAPS_28 (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVAPS_28_YMM_YMM.html" summary="Move Aligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_MEMqq_YMMqq" isa-set="AVX" string="VMOVAPS (M256, YMM)" vex="1" url="uops.info/html-instr/VMOVAPS_M256_YMM.html" summary="Move Aligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPS.html">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_YMMqq_YMMqq_29" isa-set="AVX" string="VMOVAPS_29 (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVAPS_29_YMM_YMM.html" summary="Move Aligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVAPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVD" iform="VMOVD_XMMdq_MEMd" isa-set="AVX" string="VMOVD (XMM, M32)" vex="1" url="uops.info/html-instr/VMOVD_XMM_M32.html" summary="Move Doubleword/Move Quadword" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVD" iform="VMOVD_XMMdq_GPR32d" isa-set="AVX" string="VMOVD (XMM, R32)" vex="1" url="uops.info/html-instr/VMOVD_XMM_R32.html" summary="Move Doubleword/Move Quadword" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="ALU2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="ALU2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="ALU2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVD" iform="VMOVD_MEMd_XMMd" isa-set="AVX" string="VMOVD (M32, XMM)" vex="1" url="uops.info/html-instr/VMOVD_M32_XMM.html" summary="Move Doubleword/Move Quadword" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html">
      <operand idx="1" memory-prefix="dword ptr" name="MEM0" type="mem" w="1" width="32" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="32" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVD" iform="VMOVD_GPR32d_XMMd" isa-set="AVX" string="VMOVD (R32, XMM)" vex="1" url="uops.info/html-instr/VMOVD_R32_XMM.html" summary="Move Doubleword/Move Quadword" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="ALU0" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="ALU0" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="ALU0" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDDUP" iform="VMOVDDUP_XMMdq_MEMq" isa-set="AVX" string="VMOVDDUP (XMM, M64)" vex="1" url="uops.info/html-instr/VMOVDDUP_XMM_M64.html" summary="Replicate Double FP Values" url-ref="felixcloutier.com/x86/MOVDDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDDUP" iform="VMOVDDUP_XMMdq_XMMq" isa-set="AVX" string="VMOVDDUP (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVDDUP_XMM_XMM.html" summary="Replicate Double FP Values" url-ref="felixcloutier.com/x86/MOVDDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDDUP" iform="VMOVDDUP_YMMqq_MEMqq" isa-set="AVX" string="VMOVDDUP (YMM, M256)" vex="1" url="uops.info/html-instr/VMOVDDUP_YMM_M256.html" summary="Replicate Double FP Values" url-ref="felixcloutier.com/x86/MOVDDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDDUP" iform="VMOVDDUP_YMMqq_YMMqq" isa-set="AVX" string="VMOVDDUP (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVDDUP_YMM_YMM.html" summary="Replicate Double FP Values" url-ref="felixcloutier.com/x86/MOVDDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_XMMdq_MEMdq" isa-set="AVX" string="VMOVDQA (XMM, M128)" vex="1" url="uops.info/html-instr/VMOVDQA_XMM_M128.html" summary="Move Aligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_XMMdq_XMMdq_6F" isa-set="AVX" string="VMOVDQA_6F (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVDQA_6F_XMM_XMM.html" summary="Move Aligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_MEMdq_XMMdq" isa-set="AVX" string="VMOVDQA (M128, XMM)" vex="1" url="uops.info/html-instr/VMOVDQA_M128_XMM.html" summary="Move Aligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_XMMdq_XMMdq_7F" isa-set="AVX" string="VMOVDQA_7F (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVDQA_7F_XMM_XMM.html" summary="Move Aligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_YMMqq_MEMqq" isa-set="AVX" string="VMOVDQA (YMM, M256)" vex="1" url="uops.info/html-instr/VMOVDQA_YMM_M256.html" summary="Move Aligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.35" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_YMMqq_YMMqq_6F" isa-set="AVX" string="VMOVDQA_6F (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVDQA_6F_YMM_YMM.html" summary="Move Aligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.38" TP_loop_same_reg="1.00" TP_ports_same_reg="0.50" TP_unrolled="0.38" TP_unrolled_same_reg="1.00" ports_same_reg="1*p05" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_MEMqq_YMMqq" isa-set="AVX" string="VMOVDQA (M256, YMM)" vex="1" url="uops.info/html-instr/VMOVDQA_M256_YMM.html" summary="Move Aligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_YMMqq_YMMqq_7F" isa-set="AVX" string="VMOVDQA_7F (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVDQA_7F_YMM_YMM.html" summary="Move Aligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.38" TP_loop_same_reg="1.00" TP_ports_same_reg="0.50" TP_unrolled="0.38" TP_unrolled_same_reg="1.00" ports_same_reg="1*p05" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_XMMdq_MEMdq" isa-set="AVX" string="VMOVDQU (XMM, M128)" vex="1" url="uops.info/html-instr/VMOVDQU_XMM_M128.html" summary="Move Unaligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_XMMdq_XMMdq_6F" isa-set="AVX" string="VMOVDQU_6F (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVDQU_6F_XMM_XMM.html" summary="Move Unaligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_YMMqq_MEMqq" isa-set="AVX" string="VMOVDQU (YMM, M256)" vex="1" url="uops.info/html-instr/VMOVDQU_YMM_M256.html" summary="Move Unaligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.35" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_YMMqq_YMMqq_6F" isa-set="AVX" string="VMOVDQU_6F (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVDQU_6F_YMM_YMM.html" summary="Move Unaligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.38" TP_loop_same_reg="1.00" TP_ports_same_reg="0.50" TP_unrolled="0.38" TP_unrolled_same_reg="1.00" ports_same_reg="1*p05" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_MEMdq_XMMdq" isa-set="AVX" string="VMOVDQU (M128, XMM)" vex="1" url="uops.info/html-instr/VMOVDQU_M128_XMM.html" summary="Move Unaligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_XMMdq_XMMdq_7F" isa-set="AVX" string="VMOVDQU_7F (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVDQU_7F_XMM_XMM.html" summary="Move Unaligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_MEMqq_YMMqq" isa-set="AVX" string="VMOVDQU (M256, YMM)" vex="1" url="uops.info/html-instr/VMOVDQU_M256_YMM.html" summary="Move Unaligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_YMMqq_YMMqq_7F" isa-set="AVX" string="VMOVDQU_7F (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVDQU_7F_YMM_YMM.html" summary="Move Unaligned Packed Integer Values" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p05" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.38" TP_loop_same_reg="1.00" TP_ports_same_reg="0.50" TP_unrolled="0.38" TP_unrolled_same_reg="1.00" ports_same_reg="1*p05" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVHLPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVHLPS" iform="VMOVHLPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VMOVHLPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVHLPS_XMM_XMM_XMM.html" summary="Move Packed Single-Precision Floating-Point Values High to Low" url-ref="felixcloutier.com/x86/MOVHLPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="1.0" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVHPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVHPD" iform="VMOVHPD_XMMdq_XMMq_MEMq" isa-set="AVX" string="VMOVHPD (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VMOVHPD_XMM_XMM_M64.html" summary="Move High Packed Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVHPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVHPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVHPD" iform="VMOVHPD_MEMq_XMMdq" isa-set="AVX" string="VMOVHPD (M64, XMM)" vex="1" url="uops.info/html-instr/VMOVHPD_M64_XMM.html" summary="Move High Packed Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVHPD.html">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2, FP2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2, FP2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP12+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2, FP4" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP123+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVHPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVHPS" iform="VMOVHPS_XMMdq_XMMq_MEMq" isa-set="AVX" string="VMOVHPS (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VMOVHPS_XMM_XMM_M64.html" summary="Move High Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVHPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVHPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVHPS" iform="VMOVHPS_MEMq_XMMdq" isa-set="AVX" string="VMOVHPS (M64, XMM)" vex="1" url="uops.info/html-instr/VMOVHPS_M64_XMM.html" summary="Move High Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVHPS.html">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2, FP2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2, FP2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP12+1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2, FP4" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*FP123+1*FP45" TP_ports="0.50" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*FP23+1*FP45" TP_ports_indexed="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVLHPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVLHPS" iform="VMOVLHPS_XMMdq_XMMq_XMMq" isa-set="AVX" string="VMOVLHPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVLHPS_XMM_XMM_XMM.html" summary="Move Packed Single-Precision Floating-Point Values Low to High" url-ref="felixcloutier.com/x86/MOVLHPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="1.0" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2, FP2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2, FP2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2, FP2" latency="2" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVLPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVLPD" iform="VMOVLPD_XMMdq_XMMdq_MEMq" isa-set="AVX" string="VMOVLPD (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VMOVLPD_XMM_XMM_M64.html" summary="Move Low Packed Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVLPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVLPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVLPD" iform="VMOVLPD_MEMq_XMMq" isa-set="AVX" string="VMOVLPD (M64, XMM)" vex="1" url="uops.info/html-instr/VMOVLPD_M64_XMM.html" summary="Move Low Packed Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVLPD.html">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP4" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVLPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVLPS" iform="VMOVLPS_XMMdq_XMMdq_MEMq" isa-set="AVX" string="VMOVLPS (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VMOVLPS_XMM_XMM_M64.html" summary="Move Low Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVLPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVLPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVLPS" iform="VMOVLPS_MEMq_XMMq" isa-set="AVX" string="VMOVLPS (M64, XMM)" vex="1" url="uops.info/html-instr/VMOVLPS_M64_XMM.html" summary="Move Low Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVLPS.html">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="64" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP4" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVMSKPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVMSKPD" iform="VMOVMSKPD_GPR32d_XMMdq" isa-set="AVX" string="VMOVMSKPD (R32, XMM)" vex="1" url="uops.info/html-instr/VMOVMSKPD_R32_XMM.html" summary="Extract Packed Double-Precision Floating-Point Sign Mask" url-ref="felixcloutier.com/x86/MOVMSKPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVMSKPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVMSKPD" iform="VMOVMSKPD_GPR32d_YMMqq" isa-set="AVX" string="VMOVMSKPD (R32, YMM)" vex="1" url="uops.info/html-instr/VMOVMSKPD_R32_YMM.html" summary="Extract Packed Double-Precision Floating-Point Sign Mask" url-ref="felixcloutier.com/x86/MOVMSKPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVMSKPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVMSKPS" iform="VMOVMSKPS_GPR32d_XMMdq" isa-set="AVX" string="VMOVMSKPS (R32, XMM)" vex="1" url="uops.info/html-instr/VMOVMSKPS_R32_XMM.html" summary="Extract Packed Single-Precision Floating-Point Sign Mask" url-ref="felixcloutier.com/x86/MOVMSKPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVMSKPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVMSKPS" iform="VMOVMSKPS_GPR32d_YMMqq" isa-set="AVX" string="VMOVMSKPS (R32, YMM)" vex="1" url="uops.info/html-instr/VMOVMSKPS_R32_YMM.html" summary="Extract Packed Single-Precision Floating-Point Sign Mask" url-ref="felixcloutier.com/x86/MOVMSKPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTDQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTDQ" iform="VMOVNTDQ_MEMdq_XMMdq" isa-set="AVX" string="VMOVNTDQ (M128, XMM)" vex="1" url="uops.info/html-instr/VMOVNTDQ_M128_XMM.html" summary="Store Packed Integers Using Non-Temporal Hint" url-ref="felixcloutier.com/x86/MOVNTDQ.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="355" cycles_addr_index="355" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="357" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="329" cycles_addr_index="330" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="312" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.02" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="353" cycles_addr_index="353" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="363" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="279" cycles_addr_is_upper_bound="1" cycles_addr_index="279" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="280" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="381" cycles_addr_is_upper_bound="1" cycles_addr_index="382" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="385" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="918" cycles_addr_is_upper_bound="1" cycles_addr_index="918" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="917" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="394" cycles_addr_index="404" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="402" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="494" cycles_addr_is_upper_bound="1" cycles_addr_index="494" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="495" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="385" cycles_addr_is_upper_bound="1" cycles_addr_index="381" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="362" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="821" cycles_addr_is_upper_bound="1" cycles_addr_index="821" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="775" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.10" TP_loop="1.04" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="393" cycles_addr_is_upper_bound="1" cycles_addr_index="394" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="388" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.30" TP_loop="4.89" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="575" cycles_addr_is_upper_bound="1" cycles_addr_index="574" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="527" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.91" TP_loop="1.03" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="382" cycles_addr_is_upper_bound="1" cycles_addr_index="382" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="376" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="476" cycles_addr_is_upper_bound="1" cycles_addr_index="475" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="473" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="531" cycles_addr_is_upper_bound="1" cycles_addr_index="531" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="524" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1225" cycles_addr_index="1224" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1226" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="1183" cycles_addr_is_upper_bound="1" cycles_addr_index="1193" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="1182" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="792" cycles_addr_is_upper_bound="1" cycles_addr_index="791" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="794" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP4" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="1003" cycles_addr_is_upper_bound="1" cycles_addr_index="1003" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="995" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTDQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTDQ" iform="VMOVNTDQ_MEMqq_YMMqq" isa-set="AVX" string="VMOVNTDQ (M256, YMM)" vex="1" url="uops.info/html-instr/VMOVNTDQ_M256_YMM.html" summary="Store Packed Integers Using Non-Temporal Hint" url-ref="felixcloutier.com/x86/MOVNTDQ.html">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="352" cycles_addr_index="351" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="357" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="311" cycles_addr_index="311" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="313" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.02" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="353" cycles_addr_index="353" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="363" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="279" cycles_addr_is_upper_bound="1" cycles_addr_index="279" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="282" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="381" cycles_addr_is_upper_bound="1" cycles_addr_index="381" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="384" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="918" cycles_addr_is_upper_bound="1" cycles_addr_index="905" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="919" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="397" cycles_addr_index="394" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="410" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="494" cycles_addr_is_upper_bound="1" cycles_addr_index="494" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="495" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="370" cycles_addr_is_upper_bound="1" cycles_addr_index="369" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="360" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="821" cycles_addr_is_upper_bound="1" cycles_addr_index="821" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="822" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.11" TP_loop="1.04" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="393" cycles_addr_is_upper_bound="1" cycles_addr_index="393" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="388" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.09" TP_loop="4.54" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="575" cycles_addr_is_upper_bound="1" cycles_addr_index="575" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="531" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.07" TP_loop="1.03" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="382" cycles_addr_is_upper_bound="1" cycles_addr_index="382" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="376" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="476" cycles_addr_is_upper_bound="1" cycles_addr_index="476" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="473" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="529" cycles_addr_is_upper_bound="1" cycles_addr_index="529" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="524" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="937" cycles_addr_index="1226" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1226" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP2" latency="1" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="1192" cycles_addr_is_upper_bound="1" cycles_addr_index="1184" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="1182" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="792" cycles_addr_is_upper_bound="1" cycles_addr_index="792" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="797" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP4" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="997" cycles_addr_is_upper_bound="1" cycles_addr_index="1003" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="997" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTDQA" iform="VMOVNTDQA_XMMdq_MEMdq" isa-set="AVX" string="VMOVNTDQA (XMM, M128)" vex="1" url="uops.info/html-instr/VMOVNTDQA_XMM_M128.html" summary="Load Double Quadword Non-Temporal Aligned Hint" url-ref="felixcloutier.com/x86/MOVNTDQA.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTPD" iform="VMOVNTPD_MEMdq_XMMdq" isa-set="AVX" string="VMOVNTPD (M128, XMM)" vex="1" url="uops.info/html-instr/VMOVNTPD_M128_XMM.html" summary="Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint" url-ref="felixcloutier.com/x86/MOVNTPD.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="356" cycles_addr_index="356" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="357" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="311" cycles_addr_index="311" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="312" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.02" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="354" cycles_addr_index="354" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="364" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="279" cycles_addr_is_upper_bound="1" cycles_addr_index="278" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="279" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="381" cycles_addr_is_upper_bound="1" cycles_addr_index="381" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="384" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.52" TP_loop="1.03" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.03" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="920" cycles_addr_is_upper_bound="1" cycles_addr_index="898" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="899" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="394" cycles_addr_index="395" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="402" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="493" cycles_addr_is_upper_bound="1" cycles_addr_index="493" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="494" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.02" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="363" cycles_addr_is_upper_bound="1" cycles_addr_index="369" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="359" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.98" TP_loop="1.02" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.33" TP_loop_indexed="1.02" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="821" cycles_addr_is_upper_bound="1" cycles_addr_index="781" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="822" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.10" TP_loop="1.05" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="393" cycles_addr_is_upper_bound="1" cycles_addr_index="393" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="388" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.55" TP_loop="4.90" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="576" cycles_addr_is_upper_bound="1" cycles_addr_index="576" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="547" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.04" TP_loop="1.04" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="381" cycles_addr_is_upper_bound="1" cycles_addr_index="381" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="375" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="477" cycles_addr_is_upper_bound="1" cycles_addr_index="476" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="474" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="530" cycles_addr_is_upper_bound="1" cycles_addr_index="530" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="524" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1224" cycles_addr_index="1229" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1225" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="1187" cycles_addr_is_upper_bound="1" cycles_addr_index="1183" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="1175" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="792" cycles_addr_is_upper_bound="1" cycles_addr_index="796" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="793" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP4" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="1003" cycles_addr_is_upper_bound="1" cycles_addr_index="1003" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="1000" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTPD" iform="VMOVNTPD_MEMqq_YMMqq" isa-set="AVX" string="VMOVNTPD (M256, YMM)" vex="1" url="uops.info/html-instr/VMOVNTPD_M256_YMM.html" summary="Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint" url-ref="felixcloutier.com/x86/MOVNTPD.html">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="351" cycles_addr_index="350" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="357" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="313" cycles_addr_index="329" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="330" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.02" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="355" cycles_addr_index="354" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="364" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="278" cycles_addr_is_upper_bound="1" cycles_addr_index="278" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="279" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="381" cycles_addr_is_upper_bound="1" cycles_addr_index="381" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="385" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.53" TP_loop="1.03" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.52" TP_loop_indexed="1.03" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="920" cycles_addr_is_upper_bound="1" cycles_addr_index="920" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="922" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.02" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="393" cycles_addr_index="405" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="410" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="493" cycles_addr_is_upper_bound="1" cycles_addr_index="493" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="495" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="364" cycles_addr_is_upper_bound="1" cycles_addr_index="372" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="358" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.25" TP_loop="1.04" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.33" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="788" cycles_addr_is_upper_bound="1" cycles_addr_index="821" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="776" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.10" TP_loop="1.05" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="393" cycles_addr_is_upper_bound="1" cycles_addr_index="393" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="387" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.43" TP_loop="4.45" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="576" cycles_addr_is_upper_bound="1" cycles_addr_index="576" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="554" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.06" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="381" cycles_addr_is_upper_bound="1" cycles_addr_index="381" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="375" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="477" cycles_addr_is_upper_bound="1" cycles_addr_index="477" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="474" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="531" cycles_addr_is_upper_bound="1" cycles_addr_index="530" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="526" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="1228" cycles_addr_index="1226" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1225" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP2" latency="1" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="1185" cycles_addr_is_upper_bound="1" cycles_addr_index="1184" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="1182" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="793" cycles_addr_is_upper_bound="1" cycles_addr_index="794" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="795" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP4" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="1003" cycles_addr_is_upper_bound="1" cycles_addr_index="1003" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="1001" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTPS" iform="VMOVNTPS_MEMdq_XMMdq" isa-set="AVX" string="VMOVNTPS (M128, XMM)" vex="1" url="uops.info/html-instr/VMOVNTPS_M128_XMM.html" summary="Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint" url-ref="felixcloutier.com/x86/MOVNTPS.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="356" cycles_addr_index="356" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="358" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="329" cycles_addr_index="329" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="329" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.02" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="354" cycles_addr_index="354" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="364" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="279" cycles_addr_is_upper_bound="1" cycles_addr_index="278" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="279" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="382" cycles_addr_is_upper_bound="1" cycles_addr_index="381" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="385" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.53" TP_loop="1.03" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.53" TP_loop_indexed="1.03" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="920" cycles_addr_is_upper_bound="1" cycles_addr_index="909" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="920" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.02" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="399" cycles_addr_index="397" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="400" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="493" cycles_addr_is_upper_bound="1" cycles_addr_index="493" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="494" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="374" cycles_addr_is_upper_bound="1" cycles_addr_index="377" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="359" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.33" TP_loop="1.02" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.02" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="821" cycles_addr_is_upper_bound="1" cycles_addr_index="821" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="822" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.14" TP_loop="1.05" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="393" cycles_addr_is_upper_bound="1" cycles_addr_index="393" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="387" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.38" TP_loop="4.91" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="576" cycles_addr_is_upper_bound="1" cycles_addr_index="576" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="550" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.06" TP_loop="1.05" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="381" cycles_addr_is_upper_bound="1" cycles_addr_index="381" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="375" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="477" cycles_addr_is_upper_bound="1" cycles_addr_index="477" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="474" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="530" cycles_addr_is_upper_bound="1" cycles_addr_index="530" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="525" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1228" cycles_addr_index="1226" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1226" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="1185" cycles_addr_is_upper_bound="1" cycles_addr_index="1184" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="1181" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="793" cycles_addr_is_upper_bound="1" cycles_addr_index="795" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="794" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP4" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="1003" cycles_addr_is_upper_bound="1" cycles_addr_index="1003" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="1000" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTPS" iform="VMOVNTPS_MEMqq_YMMqq" isa-set="AVX" string="VMOVNTPS (M256, YMM)" vex="1" url="uops.info/html-instr/VMOVNTPS_M256_YMM.html" summary="Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint" url-ref="felixcloutier.com/x86/MOVNTPS.html">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="351" cycles_addr_index="351" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="357" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="329" cycles_addr_index="329" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="330" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.02" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="354" cycles_addr_index="354" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="364" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="279" cycles_addr_is_upper_bound="1" cycles_addr_index="278" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="280" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="382" cycles_addr_is_upper_bound="1" cycles_addr_index="381" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="385" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.50" TP_loop="1.03" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.03" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="920" cycles_addr_is_upper_bound="1" cycles_addr_index="911" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="922" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.02" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="394" cycles_addr_index="398" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="410" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="493" cycles_addr_is_upper_bound="1" cycles_addr_index="493" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="494" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.02" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="363" cycles_addr_is_upper_bound="1" cycles_addr_index="371" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="358" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.33" TP_loop="1.02" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.33" TP_loop_indexed="1.02" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="774" cycles_addr_is_upper_bound="1" cycles_addr_index="821" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="822" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.13" TP_loop="1.05" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="393" cycles_addr_is_upper_bound="1" cycles_addr_index="393" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="388" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="4.72" TP_loop="3.93" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="577" cycles_addr_is_upper_bound="1" cycles_addr_index="577" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="527" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.18" TP_loop="1.05" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="381" cycles_addr_is_upper_bound="1" cycles_addr_index="381" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="375" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="477" cycles_addr_is_upper_bound="1" cycles_addr_index="477" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="474" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="532" cycles_addr_is_upper_bound="1" cycles_addr_index="530" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="526" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="1228" cycles_addr_index="1225" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1229" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP2" latency="1" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="1193" cycles_addr_is_upper_bound="1" cycles_addr_index="1184" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="1172" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="794" cycles_addr_is_upper_bound="1" cycles_addr_index="796" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="800" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="1" ports="FP4" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="1006" cycles_addr_is_upper_bound="1" cycles_addr_index="1003" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="998" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_XMMdq_GPR64q" isa-set="AVX" string="VMOVQ (XMM, R64)" vex="1" url="uops.info/html-instr/VMOVQ_XMM_R64.html" summary="Move Doubleword/Move Quadword" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_GPR64q_XMMq" isa-set="AVX" string="VMOVQ (R64, XMM)" vex="1" url="uops.info/html-instr/VMOVQ_R64_XMM.html" summary="Move Doubleword/Move Quadword" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_XMMdq_MEMq_7E" isa-set="AVX" string="VMOVQ (XMM, M64)" vex="1" url="uops.info/html-instr/VMOVQ_XMM_M64.html" summary="Move Quadword" url-ref="felixcloutier.com/x86/MOVQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_XMMdq_XMMq_7E" isa-set="AVX" string="VMOVQ_7E (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVQ_7E_XMM_XMM.html" summary="Move Quadword" url-ref="felixcloutier.com/x86/MOVQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.33" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.33" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.33" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.33" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.33" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.33" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.33" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_MEMq_XMMq_D6" isa-set="AVX" string="VMOVQ (M64, XMM)" vex="1" url="uops.info/html-instr/VMOVQ_M64_XMM.html" summary="Move Quadword" url-ref="felixcloutier.com/x86/MOVQ.html">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="i64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="64" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_XMMdq_XMMq_D6" isa-set="AVX" string="VMOVQ_D6 (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVQ_D6_XMM_XMM.html" summary="Move Quadword" url-ref="felixcloutier.com/x86/MOVQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.33" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.33" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.33" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.33" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.33" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.33" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.33" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSD" iform="VMOVSD_XMMdq_MEMq" isa-set="AVX" string="VMOVSD (XMM, M64)" vex="1" url="uops.info/html-instr/VMOVSD_XMM_M64.html" summary="Move or Merge Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVSD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSD" iform="VMOVSD_XMMdq_XMMdq_XMMq_10" isa-set="AVX" string="VMOVSD_10 (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVSD_10_XMM_XMM_XMM.html" summary="Move or Merge Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSD" iform="VMOVSD_MEMq_XMMq" isa-set="AVX" string="VMOVSD (M64, XMM)" vex="1" url="uops.info/html-instr/VMOVSD_M64_XMM.html" summary="Move or Merge Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVSD.html">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVSD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSD" iform="VMOVSD_XMMdq_XMMdq_XMMq_11" isa-set="AVX" string="VMOVSD_11 (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVSD_11_XMM_XMM_XMM.html" summary="Move or Merge Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSHDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSHDUP" iform="VMOVSHDUP_XMMdq_MEMdq" isa-set="AVX" string="VMOVSHDUP (XMM, M128)" vex="1" url="uops.info/html-instr/VMOVSHDUP_XMM_M128.html" summary="Replicate Single FP Values" url-ref="felixcloutier.com/x86/MOVSHDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSHDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSHDUP" iform="VMOVSHDUP_XMMdq_XMMdq" isa-set="AVX" string="VMOVSHDUP (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVSHDUP_XMM_XMM.html" summary="Replicate Single FP Values" url-ref="felixcloutier.com/x86/MOVSHDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSHDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSHDUP" iform="VMOVSHDUP_YMMqq_MEMqq" isa-set="AVX" string="VMOVSHDUP (YMM, M256)" vex="1" url="uops.info/html-instr/VMOVSHDUP_YMM_M256.html" summary="Replicate Single FP Values" url-ref="felixcloutier.com/x86/MOVSHDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSHDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSHDUP" iform="VMOVSHDUP_YMMqq_YMMqq" isa-set="AVX" string="VMOVSHDUP (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVSHDUP_YMM_YMM.html" summary="Replicate Single FP Values" url-ref="felixcloutier.com/x86/MOVSHDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSLDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSLDUP" iform="VMOVSLDUP_XMMdq_MEMdq" isa-set="AVX" string="VMOVSLDUP (XMM, M128)" vex="1" url="uops.info/html-instr/VMOVSLDUP_XMM_M128.html" summary="Replicate Single FP Values" url-ref="felixcloutier.com/x86/MOVSLDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSLDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSLDUP" iform="VMOVSLDUP_XMMdq_XMMdq" isa-set="AVX" string="VMOVSLDUP (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVSLDUP_XMM_XMM.html" summary="Replicate Single FP Values" url-ref="felixcloutier.com/x86/MOVSLDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSLDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSLDUP" iform="VMOVSLDUP_YMMqq_MEMqq" isa-set="AVX" string="VMOVSLDUP (YMM, M256)" vex="1" url="uops.info/html-instr/VMOVSLDUP_YMM_M256.html" summary="Replicate Single FP Values" url-ref="felixcloutier.com/x86/MOVSLDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.35" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSLDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSLDUP" iform="VMOVSLDUP_YMMqq_YMMqq" isa-set="AVX" string="VMOVSLDUP (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVSLDUP_YMM_YMM.html" summary="Replicate Single FP Values" url-ref="felixcloutier.com/x86/MOVSLDUP.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSS" iform="VMOVSS_XMMdq_MEMd" isa-set="AVX" string="VMOVSS (XMM, M32)" vex="1" url="uops.info/html-instr/VMOVSS_XMM_M32.html" summary="Move or Merge Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVSS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSS" iform="VMOVSS_XMMdq_XMMdq_XMMd_10" isa-set="AVX" string="VMOVSS_10 (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVSS_10_XMM_XMM_XMM.html" summary="Move or Merge Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSS" iform="VMOVSS_MEMd_XMMd" isa-set="AVX" string="VMOVSS (M32, XMM)" vex="1" url="uops.info/html-instr/VMOVSS_M32_XMM.html" summary="Move or Merge Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVSS.html">
      <operand idx="1" memory-prefix="dword ptr" name="MEM0" type="mem" w="1" width="32" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVSS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSS" iform="VMOVSS_XMMdq_XMMdq_XMMd_11" isa-set="AVX" string="VMOVSS_11 (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVSS_11_XMM_XMM_XMM.html" summary="Move or Merge Scalar Single-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MOVSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="0"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_XMMdq_MEMdq" isa-set="AVX" string="VMOVUPD (XMM, M128)" vex="1" url="uops.info/html-instr/VMOVUPD_XMM_M128.html" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_XMMdq_XMMdq_10" isa-set="AVX" string="VMOVUPD_10 (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVUPD_10_XMM_XMM.html" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_MEMdq_XMMdq" isa-set="AVX" string="VMOVUPD (M128, XMM)" vex="1" url="uops.info/html-instr/VMOVUPD_M128_XMM.html" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPD.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_XMMdq_XMMdq_11" isa-set="AVX" string="VMOVUPD_11 (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVUPD_11_XMM_XMM.html" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_YMMqq_MEMqq" isa-set="AVX" string="VMOVUPD (YMM, M256)" vex="1" url="uops.info/html-instr/VMOVUPD_YMM_M256.html" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_YMMqq_YMMqq_10" isa-set="AVX" string="VMOVUPD_10 (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVUPD_10_YMM_YMM.html" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_MEMqq_YMMqq" isa-set="AVX" string="VMOVUPD (M256, YMM)" vex="1" url="uops.info/html-instr/VMOVUPD_M256_YMM.html" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPD.html">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_YMMqq_YMMqq_11" isa-set="AVX" string="VMOVUPD_11 (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVUPD_11_YMM_YMM.html" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_XMMdq_MEMdq" isa-set="AVX" string="VMOVUPS (XMM, M128)" vex="1" url="uops.info/html-instr/VMOVUPS_XMM_M128.html" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="6" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="6" cycles_addr_is_upper_bound="1" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_XMMdq_XMMdq_10" isa-set="AVX" string="VMOVUPS_10 (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVUPS_10_XMM_XMM.html" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_MEMdq_XMMdq" isa-set="AVX" string="VMOVUPS (M128, XMM)" vex="1" url="uops.info/html-instr/VMOVUPS_M128_XMM.html" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPS.html">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_XMMdq_XMMdq_11" isa-set="AVX" string="VMOVUPS_11 (XMM, XMM)" vex="1" url="uops.info/html-instr/VMOVUPS_11_XMM_XMM.html" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.33" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.20" TP_loop="0.20" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_YMMqq_MEMqq" isa-set="AVX" string="VMOVUPS (YMM, M256)" vex="1" url="uops.info/html-instr/VMOVUPS_YMM_M256.html" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="1.00" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="1" ports="1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="1" ports_indexed="1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p23" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p23A" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_YMMqq_YMMqq_10" isa-set="AVX" string="VMOVUPS_10 (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVUPS_10_YMM_YMM.html" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_MEMqq_YMMqq" isa-set="AVX" string="VMOVUPS (M256, YMM)" vex="1" url="uops.info/html-instr/VMOVUPS_M256_YMM.html" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPS.html">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="2.00" fusion_occurred="1" latency="5" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="2" ports="1*p23+2*p4" TP_ports="2.00" TP_indexed="2.00" uops_indexed="2" ports_indexed="1*p23+2*p4" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="5" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.91" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.90" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="5" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="0.92" fusion_occurred="1" uops="2" ports="1*p237+1*p4" TP_ports="1.00" TP_indexed="0.84" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p237+1*p4" TP_ports="1.00" uops_retire_slots_indexed="1" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p4" TP_ports_indexed="1.00">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p49+1*p78" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="4" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="7" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="1" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="8" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP45" TP_ports="0.50">
          <latency start_op="1" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1"/>
          <latency start_op="2" target_op="1" cycles="9" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_YMMqq_YMMqq_11" isa-set="AVX" string="VMOVUPS_11 (YMM, YMM)" vex="1" url="uops.info/html-instr/VMOVUPS_11_YMM_YMM.html" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MOVUPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.25" latency="1" TP_no_interiteration="0.24" uops="0"/>
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.25" TP_no_interiteration="0.24" uops="1"/>
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.62" TP_loop="0.56" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p5" TP_ports_same_reg="1.00">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.24" uops="1"/>
        <IACA version="3.0" TP="0.24" uops="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.27" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.27" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.25" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.25" TP_loop="0.20" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1" ports_same_reg="1*p015" TP_ports_same_reg="0.33">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.17" TP_loop="0.17" uops="0" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="1.00" TP_loop_same_reg="1.00" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="0" cycles_same_reg="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FPU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.17" uops="1">
          <latency start_op="2" target_op="1" cycles="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMPSADBW" category="AVX" cpl="3" extension="AVX" iclass="VMPSADBW" iform="VMPSADBW_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VMPSADBW (XMM, XMM, M128, I8)" vex="1" url="uops.info/html-instr/VMPSADBW_XMM_XMM_M128_I8.html" summary="Compute Multiple Packed Sums of Absolute Difference" url-ref="felixcloutier.com/x86/MPSADBW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u8"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="11" TP_no_interiteration="1.00" uops="4" ports="1*p0+2*p15+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+2*p15+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+2*p15+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+2*p15+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+2*p15+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="11" TP_no_interiteration="1.00" uops="4" ports="1*p0+2*p15+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="4" ports="1*p0+2*p15+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+2*p15+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="4" ports="1*p0+2*p15+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="4" ports_indexed="1*p0+2*p15+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="13" TP_no_interiteration="2.00" uops="4" ports="1*p0+1*p23+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p0+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p0+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p0+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p0+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" uops="4" ports="1*p0+1*p23+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="1*p0+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p0+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="1*p0+1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="1*p0+1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" uops="4" ports="1*p0+1*p23+2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*p0+1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p15+1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p15+1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p15+1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="1*p15+1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="3" uops_MITE_indexed="3" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="3" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="3" ports_indexed="2*p15+1*p23A" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.56" TP_loop="3.09" uops="4">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="6">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="6">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="6" ports="1*FP1+1*FP12+1*FP3" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="6" ports="1*FP0123+2*FP1" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMPSADBW" category="AVX" cpl="3" extension="AVX" iclass="VMPSADBW" iform="VMPSADBW_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VMPSADBW (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VMPSADBW_XMM_XMM_XMM_I8.html" summary="Compute Multiple Packed Sums of Absolute Difference" url-ref="felixcloutier.com/x86/MPSADBW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="3" ports="1*p0+2*p15" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+2*p15" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+2*p15" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="3" ports="1*p0+2*p15" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="3" ports="1*p0+2*p15" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="3" ports="1*p0+2*p15" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="7" TP_no_interiteration="2.00" uops="3" ports="1*p0+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p0+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p0+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.95" uops="3" ports="1*p0+2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="1*p0+2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="1*p0+2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.94" uops="3" ports="1*p0+2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p0+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.96" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.96" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p15+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p15+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p15+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p15+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="2" complex_decoder="1" TP_unrolled="3.53" TP_loop="3.15" uops="3">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="4.5" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="ucode" latency="ucode" TP="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP03+1*FP1+1*FP12+1*FP23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="ucode" latency="ucode" TP="ucode"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP01+2*FP12" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPD" category="AVX" cpl="3" extension="AVX" iclass="VMULPD" iform="VMULPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMULPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VMULPD_XMM_XMM_M128.html" summary="Multiply Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MULPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="11" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPD" category="AVX" cpl="3" extension="AVX" iclass="VMULPD" iform="VMULPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMULPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMULPD_XMM_XMM_XMM.html" summary="Multiply Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MULPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="5" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPD" category="AVX" cpl="3" extension="AVX" iclass="VMULPD" iform="VMULPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMULPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VMULPD_YMM_YMM_M256.html" summary="Multiply Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MULPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="12" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="12" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="12" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPD" category="AVX" cpl="3" extension="AVX" iclass="VMULPD" iform="VMULPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMULPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VMULPD_YMM_YMM_YMM.html" summary="Multiply Packed Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MULPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="5" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="4" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPS" category="AVX" cpl="3" extension="AVX" iclass="VMULPS" iform="VMULPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMULPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VMULPS_XMM_XMM_M128.html" summary="Multiply Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MULPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="11" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPS" category="AVX" cpl="3" extension="AVX" iclass="VMULPS" iform="VMULPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMULPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMULPS_XMM_XMM_XMM.html" summary="Multiply Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MULPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="5" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPS" category="AVX" cpl="3" extension="AVX" iclass="VMULPS" iform="VMULPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMULPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VMULPS_YMM_YMM_M256.html" summary="Multiply Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MULPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="12" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="12" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="12" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.54" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.52" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPS" category="AVX" cpl="3" extension="AVX" iclass="VMULPS" iform="VMULPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMULPS (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VMULPS_YMM_YMM_YMM.html" summary="Multiply Packed Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MULPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="5" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP0/1" latency="3" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULSD" category="AVX" cpl="3" extension="AVX" iclass="VMULSD" iform="VMULSD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VMULSD (XMM, XMM, M64)" vex="1" url="uops.info/html-instr/VMULSD_XMM_XMM_M64.html" summary="Multiply Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MULSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="11" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULSD" category="AVX" cpl="3" extension="AVX" iclass="VMULSD" iform="VMULSD_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VMULSD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMULSD_XMM_XMM_XMM.html" summary="Multiply Scalar Double-Precision Floating-Point Value" url-ref="felixcloutier.com/x86/MULSD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="5" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="4" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULSS" category="AVX" cpl="3" extension="AVX" iclass="VMULSS" iform="VMULSS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VMULSS (XMM, XMM, M32)" vex="1" url="uops.info/html-instr/VMULSS_XMM_XMM_M32.html" summary="Multiply Scalar Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MULSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="11" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.55" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.53" TP_loop="0.53" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.53" TP_loop_indexed="0.53" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.54" TP_loop="0.55" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.55" TP_loop_indexed="0.55" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULSS" category="AVX" cpl="3" extension="AVX" iclass="VMULSS" iform="VMULSS_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VMULSS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VMULSS_XMM_XMM_XMM.html" summary="Multiply Scalar Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/MULSS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="5" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="3" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPD" iform="VORPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VORPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VORPD_XMM_XMM_M128.html" summary="Bitwise Logical OR of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ORPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.35" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPD" iform="VORPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VORPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VORPD_XMM_XMM_XMM.html" summary="Bitwise Logical OR of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ORPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPD" iform="VORPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VORPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VORPD_YMM_YMM_M256.html" summary="Bitwise Logical OR of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ORPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="u64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.42" TP_loop="0.40" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPD" iform="VORPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VORPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VORPD_YMM_YMM_YMM.html" summary="Bitwise Logical OR of Packed Double Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ORPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPS" iform="VORPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VORPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VORPS_XMM_XMM_M128.html" summary="Bitwise Logical OR of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ORPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPS" iform="VORPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VORPS (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VORPS_XMM_XMM_XMM.html" summary="Bitwise Logical OR of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ORPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPS" iform="VORPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VORPS (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VORPS_YMM_YMM_M256.html" summary="Bitwise Logical OR of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ORPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="u32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.40" TP_loop="0.40" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.52" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPS" iform="VORPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VORPS (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VORPS_YMM_YMM_YMM.html" summary="Bitwise Logical OR of Packed Single Precision Floating-Point Values" url-ref="felixcloutier.com/x86/ORPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPABSB" category="AVX" cpl="3" extension="AVX" iclass="VPABSB" iform="VPABSB_XMMdq_MEMdq" isa-set="AVX" string="VPABSB (XMM, M128)" vex="1" url="uops.info/html-instr/VPABSB_XMM_M128.html" summary="Packed Absolute Value" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPABSB" category="AVX" cpl="3" extension="AVX" iclass="VPABSB" iform="VPABSB_XMMdq_XMMdq" isa-set="AVX" string="VPABSB (XMM, XMM)" vex="1" url="uops.info/html-instr/VPABSB_XMM_XMM.html" summary="Packed Absolute Value" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPABSD" category="AVX" cpl="3" extension="AVX" iclass="VPABSD" iform="VPABSD_XMMdq_MEMdq" isa-set="AVX" string="VPABSD (XMM, M128)" vex="1" url="uops.info/html-instr/VPABSD_XMM_M128.html" summary="Packed Absolute Value" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPABSD" category="AVX" cpl="3" extension="AVX" iclass="VPABSD" iform="VPABSD_XMMdq_XMMdq" isa-set="AVX" string="VPABSD (XMM, XMM)" vex="1" url="uops.info/html-instr/VPABSD_XMM_XMM.html" summary="Packed Absolute Value" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPABSW" category="AVX" cpl="3" extension="AVX" iclass="VPABSW" iform="VPABSW_XMMdq_MEMdq" isa-set="AVX" string="VPABSW (XMM, M128)" vex="1" url="uops.info/html-instr/VPABSW_XMM_M128.html" summary="Packed Absolute Value" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPABSW" category="AVX" cpl="3" extension="AVX" iclass="VPABSW" iform="VPABSW_XMMdq_XMMdq" isa-set="AVX" string="VPABSW (XMM, XMM)" vex="1" url="uops.info/html-instr/VPABSW_XMM_XMM.html" summary="Packed Absolute Value" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKSSDW" category="AVX" cpl="3" extension="AVX" iclass="VPACKSSDW" iform="VPACKSSDW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPACKSSDW (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPACKSSDW_XMM_XMM_M128.html" summary="Pack with Signed Saturation" url-ref="felixcloutier.com/x86/PACKSSWB:PACKSSDW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0" latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKSSDW" category="AVX" cpl="3" extension="AVX" iclass="VPACKSSDW" iform="VPACKSSDW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPACKSSDW (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPACKSSDW_XMM_XMM_XMM.html" summary="Pack with Signed Saturation" url-ref="felixcloutier.com/x86/PACKSSWB:PACKSSDW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKSSWB" category="AVX" cpl="3" extension="AVX" iclass="VPACKSSWB" iform="VPACKSSWB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPACKSSWB (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPACKSSWB_XMM_XMM_M128.html" summary="Pack with Signed Saturation" url-ref="felixcloutier.com/x86/PACKSSWB:PACKSSDW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0" latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKSSWB" category="AVX" cpl="3" extension="AVX" iclass="VPACKSSWB" iform="VPACKSSWB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPACKSSWB (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPACKSSWB_XMM_XMM_XMM.html" summary="Pack with Signed Saturation" url-ref="felixcloutier.com/x86/PACKSSWB:PACKSSDW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKUSDW" category="AVX" cpl="3" extension="AVX" iclass="VPACKUSDW" iform="VPACKUSDW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPACKUSDW (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPACKUSDW_XMM_XMM_M128.html" summary="Pack with Unsigned Saturation" url-ref="felixcloutier.com/x86/PACKUSDW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0" latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKUSDW" category="AVX" cpl="3" extension="AVX" iclass="VPACKUSDW" iform="VPACKUSDW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPACKUSDW (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPACKUSDW_XMM_XMM_XMM.html" summary="Pack with Unsigned Saturation" url-ref="felixcloutier.com/x86/PACKUSDW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKUSWB" category="AVX" cpl="3" extension="AVX" iclass="VPACKUSWB" iform="VPACKUSWB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPACKUSWB (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPACKUSWB_XMM_XMM_M128.html" summary="Pack with Unsigned Saturation" url-ref="felixcloutier.com/x86/PACKUSWB.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0" latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKUSWB" category="AVX" cpl="3" extension="AVX" iclass="VPACKUSWB" iform="VPACKUSWB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPACKUSWB (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPACKUSWB_XMM_XMM_XMM.html" summary="Pack with Unsigned Saturation" url-ref="felixcloutier.com/x86/PACKUSWB.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDB" category="AVX" cpl="3" extension="AVX" iclass="VPADDB" iform="VPADDB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDB (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPADDB_XMM_XMM_M128.html" summary="Add Packed Integers" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.35" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDB" category="AVX" cpl="3" extension="AVX" iclass="VPADDB" iform="VPADDB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDB (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPADDB_XMM_XMM_XMM.html" summary="Add Packed Integers" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDD" category="AVX" cpl="3" extension="AVX" iclass="VPADDD" iform="VPADDD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPADDD_XMM_XMM_M128.html" summary="Add Packed Integers" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDD" category="AVX" cpl="3" extension="AVX" iclass="VPADDD" iform="VPADDD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPADDD_XMM_XMM_XMM.html" summary="Add Packed Integers" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDQ" category="AVX" cpl="3" extension="AVX" iclass="VPADDQ" iform="VPADDQ_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDQ (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPADDQ_XMM_XMM_M128.html" summary="Add Packed Integers" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.96" TP_unrolled_indexed="0.96" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDQ" category="AVX" cpl="3" extension="AVX" iclass="VPADDQ" iform="VPADDQ_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDQ (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPADDQ_XMM_XMM_XMM.html" summary="Add Packed Integers" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDSB" category="AVX" cpl="3" extension="AVX" iclass="VPADDSB" iform="VPADDSB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDSB (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPADDSB_XMM_XMM_M128.html" summary="Add Packed Signed Integers with Signed Saturation" url-ref="felixcloutier.com/x86/PADDSB:PADDSW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDSB" category="AVX" cpl="3" extension="AVX" iclass="VPADDSB" iform="VPADDSB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDSB (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPADDSB_XMM_XMM_XMM.html" summary="Add Packed Signed Integers with Signed Saturation" url-ref="felixcloutier.com/x86/PADDSB:PADDSW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDSW" category="AVX" cpl="3" extension="AVX" iclass="VPADDSW" iform="VPADDSW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDSW (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPADDSW_XMM_XMM_M128.html" summary="Add Packed Signed Integers with Signed Saturation" url-ref="felixcloutier.com/x86/PADDSB:PADDSW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDSW" category="AVX" cpl="3" extension="AVX" iclass="VPADDSW" iform="VPADDSW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDSW (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPADDSW_XMM_XMM_XMM.html" summary="Add Packed Signed Integers with Signed Saturation" url-ref="felixcloutier.com/x86/PADDSB:PADDSW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDUSB" category="AVX" cpl="3" extension="AVX" iclass="VPADDUSB" iform="VPADDUSB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDUSB (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPADDUSB_XMM_XMM_M128.html" summary="Add Packed Unsigned Integers with Unsigned Saturation" url-ref="felixcloutier.com/x86/PADDUSB:PADDUSW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDUSB" category="AVX" cpl="3" extension="AVX" iclass="VPADDUSB" iform="VPADDUSB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDUSB (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPADDUSB_XMM_XMM_XMM.html" summary="Add Packed Unsigned Integers with Unsigned Saturation" url-ref="felixcloutier.com/x86/PADDUSB:PADDUSW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDUSW" category="AVX" cpl="3" extension="AVX" iclass="VPADDUSW" iform="VPADDUSW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDUSW (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPADDUSW_XMM_XMM_M128.html" summary="Add Packed Unsigned Integers with Unsigned Saturation" url-ref="felixcloutier.com/x86/PADDUSB:PADDUSW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u16"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDUSW" category="AVX" cpl="3" extension="AVX" iclass="VPADDUSW" iform="VPADDUSW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDUSW (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPADDUSW_XMM_XMM_XMM.html" summary="Add Packed Unsigned Integers with Unsigned Saturation" url-ref="felixcloutier.com/x86/PADDUSB:PADDUSW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDW" category="AVX" cpl="3" extension="AVX" iclass="VPADDW" iform="VPADDW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDW (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPADDW_XMM_XMM_M128.html" summary="Add Packed Integers" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.35" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDW" category="AVX" cpl="3" extension="AVX" iclass="VPADDW" iform="VPADDW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDW (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPADDW_XMM_XMM_XMM.html" summary="Add Packed Integers" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPALIGNR" category="AVX" cpl="3" extension="AVX" iclass="VPALIGNR" iform="VPALIGNR_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPALIGNR (XMM, XMM, M128, I8)" vex="1" url="uops.info/html-instr/VPALIGNR_XMM_XMM_M128_I8.html" summary="Packed Align Right" url-ref="felixcloutier.com/x86/PALIGNR.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u8"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPALIGNR" category="AVX" cpl="3" extension="AVX" iclass="VPALIGNR" iform="VPALIGNR_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPALIGNR (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VPALIGNR_XMM_XMM_XMM_I8.html" summary="Packed Align Right" url-ref="felixcloutier.com/x86/PALIGNR.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="1.0" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPAND" category="LOGICAL" cpl="3" extension="AVX" iclass="VPAND" iform="VPAND_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPAND (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPAND_XMM_XMM_M128.html" summary="Logical AND" url-ref="felixcloutier.com/x86/PAND.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u128"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPAND" category="LOGICAL" cpl="3" extension="AVX" iclass="VPAND" iform="VPAND_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPAND (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPAND_XMM_XMM_XMM.html" summary="Logical AND" url-ref="felixcloutier.com/x86/PAND.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.34" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.34" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.34" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPANDN" category="LOGICAL" cpl="3" extension="AVX" iclass="VPANDN" iform="VPANDN_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPANDN (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPANDN_XMM_XMM_M128.html" summary="Logical AND NOT" url-ref="felixcloutier.com/x86/PANDN.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u128"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p015+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p015+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p015+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="2" ports="1*p015+1*p23A" TP_ports="0.33" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPANDN" category="LOGICAL" cpl="3" extension="AVX" iclass="VPANDN" iform="VPANDN_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPANDN (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPANDN_XMM_XMM_XMM.html" summary="Logical AND NOT" url-ref="felixcloutier.com/x86/PANDN.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.34" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.34" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.34" latency="1" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.34" TP_no_interiteration="0.35" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.34" uops="1" ports="1*p015" TP_ports="0.33"/>
        <IACA version="3.0" TP="0.33" uops="1" ports="1*p015" TP_ports="0.33"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*p015" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.25" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FPU" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPAVGB" category="AVX" cpl="3" extension="AVX" iclass="VPAVGB" iform="VPAVGB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPAVGB (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPAVGB_XMM_XMM_M128.html" summary="Average Packed Integers" url-ref="felixcloutier.com/x86/PAVGB:PAVGW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPAVGB" category="AVX" cpl="3" extension="AVX" iclass="VPAVGB" iform="VPAVGB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPAVGB (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPAVGB_XMM_XMM_XMM.html" summary="Average Packed Integers" url-ref="felixcloutier.com/x86/PAVGB:PAVGW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPAVGW" category="AVX" cpl="3" extension="AVX" iclass="VPAVGW" iform="VPAVGW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPAVGW (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPAVGW_XMM_XMM_M128.html" summary="Average Packed Integers" url-ref="felixcloutier.com/x86/PAVGB:PAVGW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u16"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPAVGW" category="AVX" cpl="3" extension="AVX" iclass="VPAVGW" iform="VPAVGW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPAVGW (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPAVGW_XMM_XMM_XMM.html" summary="Average Packed Integers" url-ref="felixcloutier.com/x86/PAVGB:PAVGW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPBLENDVB" category="AVX" cpl="3" extension="AVX" iclass="VPBLENDVB" iform="VPBLENDVB_XMMdq_XMMdq_MEMdq_XMMdq" isa-set="AVX" string="VPBLENDVB (XMM, XMM, M128, XMM)" vex="1" url="uops.info/html-instr/VPBLENDVB_XMM_XMM_M128_XMM.html" summary="Variable Blend Packed Bytes" url-ref="felixcloutier.com/x86/PBLENDVB.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <operand idx="4" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="8" TP_no_interiteration="1.00" uops="3" ports="2*p15+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p15+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p15+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p15+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p15+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p15+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="8" TP_no_interiteration="1.00" uops="3" ports="2*p15+1*p23" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="3" ports="2*p15+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p15+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="3" ports="2*p15+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="3" ports_indexed="2*p15+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p15+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="9" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" fusion_occurred="1" uops="3" ports="1*p23+2*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="3" ports_indexed="1*p23+2*p5" TP_ports_indexed="2.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*p23+2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.95" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="0.95" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.95" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="0.95" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="3" ports="2*p015+1*p23" TP_ports="0.67" TP_indexed="2.00" uops_indexed="3" ports_indexed="2*p015+1*p23" TP_ports_indexed="0.67"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="2*p015+1*p23" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="4" ports="3*p015+1*p23A" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.53" TP_loop="3.17" uops="4">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPBLENDVB" category="AVX" cpl="3" extension="AVX" iclass="VPBLENDVB" iform="VPBLENDVB_XMMdq_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPBLENDVB (XMM, XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPBLENDVB_XMM_XMM_XMM_XMM.html" summary="Variable Blend Packed Bytes" url-ref="felixcloutier.com/x86/PBLENDVB.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="REG3" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="2*p15" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="2*p15" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="2*p15" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="2" TP_no_interiteration="1.00" uops="2" ports="2*p15" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="2*p15" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="2*p15" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="2" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.90" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="2" ports="2*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.90" uops="2" ports="2*p5" TP_ports="2.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="2.00" TP_loop="2.00" uops="2" ports="2*p5" TP_ports="2.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="2"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <IACA version="3.0" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <IACA version="3.0" TP="0.95" uops="2" ports="2*p015" TP_ports="0.67"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="2*p015" TP_ports="0.67">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="3" ports="3*p015" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
          <latency start_op="4" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="3" complex_decoder="1" TP_unrolled="3.56" TP_loop="3.23" uops="4">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
          <latency start_op="4" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
          <latency start_op="4" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPBLENDW" category="AVX" cpl="3" extension="AVX" iclass="VPBLENDW" iform="VPBLENDW_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPBLENDW (XMM, XMM, M128, I8)" vex="1" url="uops.info/html-instr/VPBLENDW_XMM_XMM_M128_I8.html" summary="Blend Packed Words" url-ref="felixcloutier.com/x86/PBLENDW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u16"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="0.50" uops="2" ports="1*p15+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="0.95" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPBLENDW" category="AVX" cpl="3" extension="AVX" iclass="VPBLENDW" iform="VPBLENDW_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPBLENDW (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VPBLENDW_XMM_XMM_XMM_I8.html" summary="Blend Packed Words" url-ref="felixcloutier.com/x86/PBLENDW.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.45" TP_ports="0.33" TP_unrolled="0.37" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.38" TP_loop="0.38" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/3" latency="1" TP="0.33"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.37" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.37" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCLMULQDQ" category="AVX" cpl="3" extension="AVX" iclass="VPCLMULQDQ" iform="VPCLMULQDQ_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCLMULQDQ (XMM, XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VPCLMULQDQ_XMM_XMM_XMM_I8.html" summary="Carry-Less Multiplication Quadword" url-ref="felixcloutier.com/x86/PCLMULQDQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <measurement TP_loop="8.00" TP_ports="6.00" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+4*p015+3*p05+4*p1+2*p15+3*p5" uops="18" uops_MITE="2" uops_MS="16" uops_retire_slots="18">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="15" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="8.00" TP_ports="6.00" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+4*p015+3*p05+4*p1+2*p15+3*p5" uops="18" uops_MITE="2" uops_MS="16" uops_retire_slots="18">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="7" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <IACA version="2.3" TP="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <IACA version="3.0" TP="1.95" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.99" uops="1" ports="1*p0" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.99" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.99" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
        <doc TP="1.0" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP0123+1*FP1+1*FP12" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP01+1*FP123+1*FP23" TP_ports="0.75">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles="4"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCLMULQDQ" category="AVX" cpl="3" extension="AVX" iclass="VPCLMULQDQ" iform="VPCLMULQDQ_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCLMULQDQ (XMM, XMM, M128, I8)" vex="1" url="uops.info/html-instr/VPCLMULQDQ_XMM_XMM_M128_I8.html" summary="Carry-Less Multiplication Quadword" url-ref="felixcloutier.com/x86/PCLMULQDQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <measurement TP_loop="8.00" TP_ports="5.67" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+3*p015+3*p05+4*p1+2*p15+1*p23+3*p5" uops="18" uops_MITE="2" uops_MS="16" uops_retire_slots="18">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="8.00" TP_ports="5.67" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+3*p015+3*p05+4*p1+2*p15+1*p23+3*p5" uops="18" uops_MITE="2" uops_MS="16" uops_retire_slots="18">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="7" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p5" TP_ports="2.00"/>
        <IACA version="2.2" TP="2.00" fusion_occurred="1" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p23+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p23+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="2.3" TP="2.00" fusion_occurred="1" uops="4" ports="2*p0+1*p23+1*p5" TP_ports="2.00" TP_indexed="2.00" uops_indexed="4" ports_indexed="2*p0+1*p23+1*p5" TP_ports_indexed="2.00"/>
        <IACA version="3.0" TP="2.00" uops="4" ports="2*p0+1*p23+1*p5" TP_ports="2.00"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="7"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="13" cycles_addr_is_upper_bound="1" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP01+2*FP12" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP01+1*FP123+1*FP23" TP_ports="0.75">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="12" cycles_addr_is_upper_bound="1" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQB" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQB" iform="VPCMPEQB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPEQB (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPCMPEQB_XMM_XMM_M128.html" summary="Compare Packed Data for Equal" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQB" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQB" iform="VPCMPEQB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPEQB (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPCMPEQB_XMM_XMM_XMM.html" summary="Compare Packed Data for Equal" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQD" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQD" iform="VPCMPEQD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPEQD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPCMPEQD_XMM_XMM_M128.html" summary="Compare Packed Data for Equal" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQD" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQD" iform="VPCMPEQD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPEQD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPCMPEQD_XMM_XMM_XMM.html" summary="Compare Packed Data for Equal" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_unrolled="0.33" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQQ" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQQ" iform="VPCMPEQQ_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPEQQ (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPCMPEQQ_XMM_XMM_M128.html" summary="Compare Packed Qword Data for Equal" url-ref="felixcloutier.com/x86/PCMPEQQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQQ" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQQ" iform="VPCMPEQQ_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPEQQ (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPCMPEQQ_XMM_XMM_XMM.html" summary="Compare Packed Qword Data for Equal" url-ref="felixcloutier.com/x86/PCMPEQQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP03" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQW" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQW" iform="VPCMPEQW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPEQW (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPCMPEQW_XMM_XMM_M128.html" summary="Compare Packed Data for Equal" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u16"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQW" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQW" iform="VPCMPEQW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPEQW (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPCMPEQW_XMM_XMM_XMM.html" summary="Compare Packed Data for Equal" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRI" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRI" iform="VPCMPESTRI_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPESTRI (XMM, M128, I8)" vex="1" url="uops.info/html-instr/VPCMPESTRI_XMM_M128_I8.html" summary="Packed Compare Explicit Length Strings, Return Index" url-ref="felixcloutier.com/x86/PCMPESTRI.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" r="1" suppressed="1" type="reg">EAX</operand>
      <operand idx="5" name="REG2" r="1" suppressed="1" type="reg">EDX</operand>
      <operand idx="6" name="REG3" suppressed="1" type="reg" w="1">ECX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG4" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_mem="29" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="18" cycles_addr_index="18" start_op="2" target_op="7"/>
          <latency cycles="17" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="17" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_mem="28" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="18" cycles_addr_index="18" start_op="2" target_op="7"/>
          <latency cycles="17" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="17" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="4.00" fusion_occurred="1" TP_no_interiteration="4.00" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.73" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="4.00" fusion_occurred="1" TP_no_interiteration="4.00" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.72" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+1*p23+2*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_index="17" cycles_mem="28" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="17" cycles_addr_index="17"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.73" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+1*p23+2*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.73" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+1*p23+2*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+1*p23+2*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+1*p23+2*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="3.96" TP_loop="4.00" uops="8" ports="3*p0+1*p015+1*p06+1*p1+1*p23A+1*p5" TP_ports="3.00" uops_retire_slots_indexed="8" uops_MITE_indexed="4" uops_MS_indexed="4" complex_decoder_indexed="1" available_simple_decoders_indexed="0" TP_unrolled_indexed="3.98" TP_loop_indexed="4.00" uops_indexed="8" ports_indexed="3*p0+1*p06+1*p1+1*p23A" TP_ports_indexed="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="31" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="12" complex_decoder="1" TP_unrolled="13.94" TP_loop="12.58" uops="12">
          <latency start_op="1" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="18" cycles_addr_index="18" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="20" cycles_addr_index="20"/>
          <latency start_op="4" target_op="6" cycles="17"/>
          <latency start_op="4" target_op="7" cycles="19"/>
          <latency start_op="5" target_op="6" cycles="18"/>
          <latency start_op="5" target_op="7" cycles="20"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="14" cycles_addr_index="14" cycles_mem="23" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="13"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="12"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12" ports="1*FP0123+1*FP1+1*FP3+2*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="14" cycles_addr_index="14" cycles_mem="27" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="14"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="13"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12" ports="1*FP01+2*FP1+2*FP45" TP_ports="2.00">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="14" cycles_addr_index="14" cycles_mem="29" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="13"/>
          <latency start_op="5" target_op="7" cycles="13"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRI" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRI" iform="VPCMPESTRI_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPESTRI (XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VPCMPESTRI_XMM_XMM_I8.html" summary="Packed Compare Explicit Length Strings, Return Index" url-ref="felixcloutier.com/x86/PCMPESTRI.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" r="1" suppressed="1" type="reg">EAX</operand>
      <operand idx="5" name="REG3" r="1" suppressed="1" type="reg">EDX</operand>
      <operand idx="6" name="REG4" suppressed="1" type="reg" w="1">ECX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG5" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="3.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="17" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="17" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="3.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="17" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="17" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="4.00" TP_no_interiteration="4.00" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="2.3" TP="4.05" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.62" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="4.00" TP_no_interiteration="4.00" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="2.3" TP="4.05" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.62" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.05" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.68" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.05" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.68" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+2*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+2*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+2*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+2*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+2*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="11" complex_decoder="1" TP_unrolled="12.00" TP_loop="11.03" uops="12">
          <latency start_op="1" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="18" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="19" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="17"/>
          <latency start_op="4" target_op="7" cycles="19"/>
          <latency start_op="5" target_op="6" cycles="18"/>
          <latency start_op="5" target_op="7" cycles="20"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="6">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="6">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="13"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="12"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="8" ports="1*FP0123+1*FP1+1*FP3+2*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="14"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="12"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="8" ports="1*FP01+2*FP1+2*FP45" TP_ports="2.00">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="13"/>
          <latency start_op="5" target_op="7" cycles="13"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRIQ" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRI64" iform="VPCMPESTRI64_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPESTRI64 (XMM, M128, I8)" vex="1" url="uops.info/html-instr/VPCMPESTRI64_XMM_M128_I8.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" r="1" suppressed="1" type="reg">RAX</operand>
      <operand idx="5" name="REG2" r="1" suppressed="1" type="reg">RDX</operand>
      <operand idx="6" name="REG3" suppressed="1" type="reg" w="1">RCX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG4" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="50.00" TP_ports="12.00" TP_unrolled="50.00" available_simple_decoders="0" complex_decoder="1" ports="12*p0+7*p1+2*p15+12*p5" uops="32" uops_MITE="0" uops_MS="32" uops_retire_slots="32">
          <latency cycles="50" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="50" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="49" cycles_addr_index="49" cycles_mem="50" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="50" cycles_addr_index="50" start_op="2" target_op="7"/>
          <latency cycles="49" start_op="4" target_op="6"/>
          <latency cycles="50" start_op="4" target_op="7"/>
          <latency cycles="49" start_op="5" target_op="6"/>
          <latency cycles="50" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="46.00" TP_ports="13.00" TP_unrolled="46.00" available_simple_decoders="0" complex_decoder="1" ports="11*p0+8*p1+1*p23+13*p5" uops="33" uops_MITE="0" uops_MS="33" uops_retire_slots="33">
          <latency cycles="46" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="46" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="47" cycles_addr_index="47" cycles_mem="47" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="46" cycles_addr_index="46" start_op="2" target_op="7"/>
          <latency cycles="47" start_op="4" target_op="6"/>
          <latency cycles="47" start_op="4" target_op="7"/>
          <latency cycles="47" start_op="5" target_op="6"/>
          <latency cycles="47" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="4.00" fusion_occurred="1" TP_no_interiteration="4.00" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.73" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="4.00" fusion_occurred="1" TP_no_interiteration="4.00" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.72" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+1*p23+2*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_index="17" cycles_mem="28" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="17" cycles_addr_index="17"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.73" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+1*p23+2*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="9" ports_indexed="4*p0+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.73" uops="9" ports="4*p0+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+1*p23+2*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+1*p23+2*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+1*p23+2*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p015+1*p06+1*p1+1*p23A+1*p5" TP_ports="3.00" uops_retire_slots_indexed="8" uops_MITE_indexed="4" uops_MS_indexed="4" complex_decoder_indexed="1" available_simple_decoders_indexed="0" TP_unrolled_indexed="3.98" TP_loop_indexed="4.00" uops_indexed="8" ports_indexed="3*p0+1*p06+1*p1+1*p23A" TP_ports_indexed="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_index="16" cycles_mem="31" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="12" complex_decoder="1" TP_unrolled="13.95" TP_loop="12.67" uops="12">
          <latency start_op="1" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="18" cycles_addr_index="18" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="20" cycles_addr_index="20"/>
          <latency start_op="4" target_op="6" cycles="17"/>
          <latency start_op="4" target_op="7" cycles="19"/>
          <latency start_op="5" target_op="6" cycles="18"/>
          <latency start_op="5" target_op="7" cycles="20"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="7.00" TP_unrolled="7.00" uops="10">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="14" cycles_addr_index="14" cycles_mem="23" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="13"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="12"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12" ports="1*FP0123+1*FP1+1*FP3+2*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="14" cycles_addr_index="14" cycles_mem="27" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="14"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="12"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12" ports="1*FP01+2*FP1+2*FP45" TP_ports="2.00">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="14" cycles_addr_index="14" cycles_mem="29" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="13"/>
          <latency start_op="5" target_op="7" cycles="13"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRIQ" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRI64" iform="VPCMPESTRI64_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPESTRI64 (XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VPCMPESTRI64_XMM_XMM_I8.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" r="1" suppressed="1" type="reg">RAX</operand>
      <operand idx="5" name="REG3" r="1" suppressed="1" type="reg">RDX</operand>
      <operand idx="6" name="REG4" suppressed="1" type="reg" w="1">RCX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG5" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="93.31" TP_ports="19.00" TP_unrolled="93.25" available_simple_decoders="0" complex_decoder="1" ports="17*p0+10*p1+1*p15+19*p5" uops="33" uops_MITE="0" uops_MS="50" uops_retire_slots="33">
          <latency cycles="94" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="94" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="93" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="93" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="94" start_op="4" target_op="6"/>
          <latency cycles="94" start_op="4" target_op="7"/>
          <latency cycles="94" start_op="5" target_op="6"/>
          <latency cycles="94" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="92.31" TP_ports="21.00" TP_unrolled="92.25" available_simple_decoders="0" complex_decoder="1" ports="15*p0+12*p1+1*p23+21*p5" uops="49" uops_MITE="0" uops_MS="51" uops_retire_slots="33">
          <latency cycles="93" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="93" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="92" start_op="4" target_op="6"/>
          <latency cycles="92" start_op="4" target_op="7"/>
          <latency cycles="92" start_op="5" target_op="6"/>
          <latency cycles="92" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="4.00" TP_no_interiteration="4.00" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="2.3" TP="4.05" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.62" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="4.00" TP_no_interiteration="4.00" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="2.3" TP="4.05" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.62" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.05" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.68" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.05" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.68" uops="8" ports="4*p0+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+2*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+1*p06+1*p1+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+2*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+2*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+2*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="8" uops_MITE="4" uops_MS="4" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.00" TP_loop="4.00" uops="8" ports="3*p0+2*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="11" complex_decoder="1" TP_unrolled="12.00" TP_loop="11.00" uops="12">
          <latency start_op="1" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="18" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="19" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="17"/>
          <latency start_op="4" target_op="7" cycles="19"/>
          <latency start_op="5" target_op="6" cycles="18"/>
          <latency start_op="5" target_op="7" cycles="20"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="8">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="6">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="13"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="12"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="8" ports="1*FP0123+1*FP1+1*FP3+2*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="14"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="12"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="8" ports="1*FP01+2*FP1+2*FP45" TP_ports="2.00">
          <latency start_op="1" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="15"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="13"/>
          <latency start_op="5" target_op="7" cycles="13"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRM" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRM" iform="VPCMPESTRM_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPESTRM (XMM, M128, I8)" vex="1" url="uops.info/html-instr/VPCMPESTRM_XMM_M128_I8.html" summary="Packed Compare Explicit Length Strings, Return Mask" url-ref="felixcloutier.com/x86/PCMPESTRM.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" r="1" suppressed="1" type="reg">EAX</operand>
      <operand idx="5" name="REG2" r="1" suppressed="1" type="reg">EDX</operand>
      <operand idx="6" name="REG3" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG4" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="18" cycles_addr_index="18" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="18" cycles_addr_index="18" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="4.00" fusion_occurred="1" TP_no_interiteration="4.00" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.66" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="4.00" fusion_occurred="1" TP_no_interiteration="4.00" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.66" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+1*p23+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="17" cycles_addr_index="17"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.69" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+1*p23+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="9"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.69" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+1*p23+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="9"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+1*p23+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="9"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+1*p23+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="9"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="17" cycles_addr_index="17"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="17" cycles_addr_index="17"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="17" cycles_addr_index="17"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.98" TP_loop="5.00" uops="9" ports="3*p0+2*p015+1*p06+1*p1+1*p23A+1*p5" TP_ports="3.00" uops_retire_slots_indexed="9" uops_MITE_indexed="4" uops_MS_indexed="5" complex_decoder_indexed="1" available_simple_decoders_indexed="0" TP_unrolled_indexed="5.00" TP_loop_indexed="5.00" uops_indexed="9" ports_indexed="3*p0+1*p06+1*p1+1*p15+1*p23A" TP_ports_indexed="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="11" complex_decoder="1" TP_unrolled="11.94" TP_loop="10.88" uops="11">
          <latency start_op="1" target_op="6" cycles="6"/>
          <latency start_op="1" target_op="7" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="20" cycles_addr_index="20"/>
          <latency start_op="4" target_op="6" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="19"/>
          <latency start_op="5" target_op="6" cycles="18" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="20"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12">
          <latency start_op="1" target_op="6" cycles="7"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12" ports="2*FP0123+1*FP1+1*FP3+1*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="6" cycles="6"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12" ports="3*FP01+1*FP12+1*FP45" TP_ports="1.50">
          <latency start_op="1" target_op="6" cycles="8"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="13"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRM" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRM" iform="VPCMPESTRM_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPESTRM (XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VPCMPESTRM_XMM_XMM_I8.html" summary="Packed Compare Explicit Length Strings, Return Mask" url-ref="felixcloutier.com/x86/PCMPESTRM.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" r="1" suppressed="1" type="reg">EAX</operand>
      <operand idx="5" name="REG3" r="1" suppressed="1" type="reg">EDX</operand>
      <operand idx="6" name="REG4" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG5" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="3.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="3.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="4.00" TP_no_interiteration="4.00" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="2.3" TP="4.05" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.57" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="4.00" TP_no_interiteration="4.00" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="2.3" TP="4.05" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.55" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+4*p5" TP_ports="4.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.05" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.47" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+4*p5" TP_ports="4.00">
          <latency start_op="1" target_op="6" cycles="10"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="9"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.05" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.47" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+4*p5" TP_ports="4.00">
          <latency start_op="1" target_op="6" cycles="10"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="9"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+4*p5" TP_ports="4.00">
          <latency start_op="1" target_op="6" cycles="10"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="9"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+3*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+4*p5" TP_ports="4.00">
          <latency start_op="1" target_op="6" cycles="10"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="9"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+3*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+3*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+3*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.93" TP_loop="5.00" uops="9" ports="3*p0+3*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="10" complex_decoder="1" TP_unrolled="9.97" TP_loop="9.09" uops="11">
          <latency start_op="1" target_op="6" cycles="6"/>
          <latency start_op="1" target_op="7" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="9"/>
          <latency start_op="2" target_op="7" cycles="19" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="19"/>
          <latency start_op="5" target_op="6" cycles="18" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="20"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="7">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="7" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="7">
          <latency start_op="1" target_op="6" cycles="7"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="7"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="7" ports="3*FP0123+1*FP1+1*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="6" cycles="6"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="6"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="7" ports="2*FP01+1*FP0123+1*FP1+1*FP45" TP_ports="1.50">
          <latency start_op="1" target_op="6" cycles="7"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="7"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="13"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRMQ" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRM64" iform="VPCMPESTRM64_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPESTRM64 (XMM, M128, I8)" vex="1" url="uops.info/html-instr/VPCMPESTRM64_XMM_M128_I8.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" r="1" suppressed="1" type="reg">RAX</operand>
      <operand idx="5" name="REG2" r="1" suppressed="1" type="reg">RDX</operand>
      <operand idx="6" name="REG3" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG4" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="50.00" TP_ports="12.00" TP_unrolled="50.00" available_simple_decoders="0" complex_decoder="1" ports="12*p0+7*p1+2*p15+12*p5" uops="32" uops_MITE="0" uops_MS="32" uops_retire_slots="32">
          <latency cycles="50" start_op="1" target_op="6"/>
          <latency cycles="50" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="50" cycles_addr_index="50" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="47" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="50" cycles_addr_index="50" start_op="2" target_op="7"/>
          <latency cycles="50" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="50" start_op="4" target_op="7"/>
          <latency cycles="50" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="50" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="46.00" TP_ports="13.00" TP_unrolled="46.00" available_simple_decoders="0" complex_decoder="1" ports="11*p0+8*p1+1*p23+13*p5" uops="33" uops_MITE="0" uops_MS="33" uops_retire_slots="33">
          <latency cycles="48" start_op="1" target_op="6"/>
          <latency cycles="46" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="47" cycles_addr_index="48" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="46" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="46" cycles_addr_index="46" start_op="2" target_op="7"/>
          <latency cycles="46" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="47" start_op="4" target_op="7"/>
          <latency cycles="46" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="47" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="4.00" fusion_occurred="1" TP_no_interiteration="4.00" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.66" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="4.00" fusion_occurred="1" TP_no_interiteration="4.00" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.66" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+1*p23+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="17" cycles_addr_index="17"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.69" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+1*p23+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="9"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.00" fusion_occurred="1" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00" TP_indexed="4.00" uops_indexed="10" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports_indexed="4.00"/>
        <IACA version="3.0" TP="3.69" uops="10" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+1*p23+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="9"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+1*p23+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="9"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+1*p23+3*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="9"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="17" cycles_addr_index="17"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="17" cycles_addr_index="17"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="17" cycles_addr_index="17"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.98" TP_loop="5.00" uops="9" ports="3*p0+2*p015+1*p06+1*p1+1*p23A+1*p5" TP_ports="3.00" uops_retire_slots_indexed="9" uops_MITE_indexed="4" uops_MS_indexed="5" complex_decoder_indexed="1" available_simple_decoders_indexed="0" TP_unrolled_indexed="5.00" TP_loop_indexed="5.00" uops_indexed="9" ports_indexed="3*p0+1*p06+1*p1+1*p15+1*p23A" TP_ports_indexed="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="16" cycles_addr_index="16"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="11" complex_decoder="1" TP_unrolled="11.92" TP_loop="10.93" uops="11">
          <latency start_op="1" target_op="6" cycles="7"/>
          <latency start_op="1" target_op="7" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="17" cycles_addr_is_upper_bound="1" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="20" cycles_addr_index="20"/>
          <latency start_op="4" target_op="6" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="19"/>
          <latency start_op="5" target_op="6" cycles="18" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="20"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="7.00" TP_unrolled="7.00" uops="10">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12">
          <latency start_op="1" target_op="6" cycles="7"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12" ports="2*FP0123+1*FP1+1*FP3+1*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="6" cycles="6"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="4.00" TP_loop="4.00" uops="12" ports="3*FP01+1*FP12+1*FP45" TP_ports="1.50">
          <latency start_op="1" target_op="6" cycles="8"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="7" cycles_addr="14" cycles_addr_index="14"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="13"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRMQ" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRM64" iform="VPCMPESTRM64_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPESTRM64 (XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VPCMPESTRM64_XMM_XMM_I8.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" r="1" suppressed="1" type="reg">RAX</operand>
      <operand idx="5" name="REG3" r="1" suppressed="1" type="reg">RDX</operand>
      <operand idx="6" name="REG4" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG5" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="93.31" TP_ports="19.00" TP_unrolled="93.25" available_simple_decoders="0" complex_decoder="1" ports="17*p0+10*p1+1*p15+19*p5" uops="33" uops_MITE="0" uops_MS="50" uops_retire_slots="33">
          <latency cycles="95" start_op="1" target_op="6"/>
          <latency cycles="94" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="94" start_op="2" target_op="6"/>
          <latency cycles="93" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="93" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="94" start_op="4" target_op="7"/>
          <latency cycles="93" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="94" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="92.31" TP_ports="21.00" TP_unrolled="92.25" available_simple_decoders="0" complex_decoder="1" ports="15*p0+12*p1+1*p23+21*p5" uops="49" uops_MITE="0" uops_MS="51" uops_retire_slots="33">
          <latency cycles="93" start_op="1" target_op="6"/>
          <latency cycles="91" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="93" start_op="2" target_op="6"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="92" start_op="4" target_op="7"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="93" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.2" TP="4.00" TP_no_interiteration="4.00" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="2.3" TP="4.05" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.57" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="4.00" TP_no_interiteration="4.00" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="2.3" TP="4.05" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.55" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+4*p5" TP_ports="4.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="15" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="15"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="4.05" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.47" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+4*p5" TP_ports="4.00">
          <latency start_op="1" target_op="6" cycles="10"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="9"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="4.05" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <IACA version="3.0" TP="3.47" uops="9" ports="4*p0+1*p015+1*p0156+3*p5" TP_ports="4.00"/>
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+4*p5" TP_ports="4.00">
          <latency start_op="1" target_op="6" cycles="10"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="9"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+4*p5" TP_ports="4.00">
          <latency start_op="1" target_op="6" cycles="10"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="9"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+3*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+1*p06+1*p1+4*p5" TP_ports="4.00">
          <latency start_op="1" target_op="6" cycles="10"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="9"/>
          <latency start_op="2" target_op="7" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+3*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+3*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="5.00" TP_loop="5.00" uops="9" ports="3*p0+3*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="9" uops_MITE="4" uops_MS="5" complex_decoder="1" available_simple_decoders="0" TP_unrolled="4.93" TP_loop="5.00" uops="9" ports="3*p0+3*p015+1*p06+1*p1+1*p5" TP_ports="3.00">
          <latency start_op="1" target_op="6" cycles="11"/>
          <latency start_op="1" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="11"/>
          <latency start_op="2" target_op="7" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="16"/>
          <latency start_op="5" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="10" complex_decoder="1" TP_unrolled="9.98" TP_loop="9.12" uops="11">
          <latency start_op="1" target_op="6" cycles="6"/>
          <latency start_op="1" target_op="7" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="9"/>
          <latency start_op="2" target_op="7" cycles="19" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="19"/>
          <latency start_op="5" target_op="6" cycles="18" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="20"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="8">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="7" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="7">
          <latency start_op="1" target_op="6" cycles="7"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="7"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="7" ports="3*FP0123+1*FP1+1*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="6" cycles="6"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="6"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="13" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="13"/>
          <latency start_op="5" target_op="6" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="12"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="7" ports="2*FP01+1*FP0123+1*FP1+1*FP45" TP_ports="1.50">
          <latency start_op="1" target_op="6" cycles="7"/>
          <latency start_op="1" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="6" cycles="8"/>
          <latency start_op="2" target_op="7" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="6" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="4" target_op="7" cycles="15"/>
          <latency start_op="5" target_op="6" cycles="14" cycles_is_upper_bound="1"/>
          <latency start_op="5" target_op="7" cycles="13"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTB" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTB" iform="VPCMPGTB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPGTB (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPCMPGTB_XMM_XMM_M128.html" summary="Compare Packed Signed Integers for Greater Than" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTB" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTB" iform="VPCMPGTB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPGTB (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPCMPGTB_XMM_XMM_XMM.html" summary="Compare Packed Signed Integers for Greater Than" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.17" TP_loop_same_reg="0.17" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_unrolled="0.33" uops="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP013" TP_ports="0.33" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTD" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTD" iform="VPCMPGTD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPGTD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPCMPGTD_XMM_XMM_M128.html" summary="Compare Packed Signed Integers for Greater Than" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTD" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTD" iform="VPCMPGTD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPGTD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPCMPGTD_XMM_XMM_XMM.html" summary="Compare Packed Signed Integers for Greater Than" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.17" TP_loop_same_reg="0.17" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_unrolled="0.33" uops="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP013" TP_ports="0.33" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTQ" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTQ" iform="VPCMPGTQ_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPGTQ (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPCMPGTQ_XMM_XMM_M128.html" summary="Compare Packed Data for Greater Than" url-ref="felixcloutier.com/x86/PCMPGTQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="11" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p0+1*p23" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p0+1*p23" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p0+1*p23" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0" latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="4"/>
          <latency start_op="3" target_op="1" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP0" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTQ" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTQ" iform="VPCMPGTQ_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPGTQ (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPCMPGTQ_XMM_XMM_XMM.html" summary="Compare Packed Data for Greater Than" url-ref="felixcloutier.com/x86/PCMPGTQ.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="1.00" TP_loop_same_reg="0.29" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="1.00" available_simple_decoders="0" available_simple_decoders_same_reg="0" complex_decoder="1" complex_decoder_same_reg="1" ports="1*p0" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="5" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="1.00" available_simple_decoders="0" available_simple_decoders_same_reg="0" complex_decoder="1" complex_decoder_same_reg="1" ports="1*p0" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="5" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="5" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="1.00" available_simple_decoders="0" available_simple_decoders_same_reg="0" complex_decoder="1" complex_decoder_same_reg="1" ports="1*p0" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="5" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.99" uops="1" ports="1*p0" TP_ports="1.00" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p0" TP_ports="1.00" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="5" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="5" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="3" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="3" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="3" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="3" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*p5" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="3" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="3" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="3" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="3" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="3" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="3" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="3" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="3" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="3" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="3" cycles_same_reg="0"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="3" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="3" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="3" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="3" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.17" TP_loop_same_reg="0.17" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="3" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="3" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1">
          <latency start_op="2" target_op="1" cycles="4" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="4" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_loop_same_reg="0.38" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*FP0" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP0" TP_ports="1.00" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.33" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.31" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50" TP_unrolled_same_reg="0.31" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="2" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="2" cycles_same_reg="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTW" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTW" iform="VPCMPGTW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPGTW (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPCMPGTW_XMM_XMM_M128.html" summary="Compare Packed Signed Integers for Greater Than" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" fusion_occurred="1" latency="7" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" fusion_occurred="1" TP_no_interiteration="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p15+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p15+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p15+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <IACA version="3.0" TP="0.50" fusion_occurred="1" uops="2" ports="1*p01+1*p23" TP_ports="0.50" TP_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="2" ports="1*p01+1*p23A" TP_ports="0.50" uops_retire_slots_indexed="2" uops_MITE_indexed="1" uops_MS_indexed="0" TP_unrolled_indexed="0.50" TP_loop_indexed="0.50" uops_indexed="2" ports_indexed="1*p01+1*p23A" TP_ports_indexed="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP013" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP0123" TP_ports="0.25">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTW" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTW" iform="VPCMPGTW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPGTW (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPCMPGTW_XMM_XMM_XMM.html" summary="Compare Packed Signed Integers for Greater Than" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="0.50" latency="1" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="0"/>
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="0.50" TP_no_interiteration="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.25" uops_same_reg="1"/>
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p15" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p15" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="0.50" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <IACA version="3.0" TP="0.49" uops="1" ports="1*p01" TP_ports="0.50" TP_same_reg="0.24" uops_same_reg="1"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.20" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*p01" TP_ports="0.50" uops_retire_slots_same_reg="1" uops_MITE_same_reg="1" uops_MS_same_reg="0" TP_unrolled_same_reg="0.17" TP_loop_same_reg="0.17" uops_same_reg="0">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_loop_same_reg="0.25" TP_ports="0.33" TP_unrolled="0.33" TP_unrolled_same_reg="0.25" ports="1*FP013" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.33" TP_loop="0.33" uops="1" ports="1*FP013" TP_ports="0.33" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.25" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/3" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
        <doc uops="1" ports="FP0/1/2/3" latency="1" TP="0.25"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.25" TP_loop="0.25" uops="1" ports="1*FP0123" TP_ports="0.25" TP_unrolled_same_reg="0.25" TP_loop_same_reg="0.17" uops_same_reg="1">
          <latency start_op="2" target_op="1" cycles="1" cycles_same_reg="0"/>
          <latency start_op="3" target_op="1" cycles="1" cycles_same_reg="0"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPISTRI" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPISTRI" iform="VPCMPISTRI_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPISTRI (XMM, M128, I8)" vex="1" url="uops.info/html-instr/VPCMPISTRI_XMM_M128_I8.html" summary="Packed Compare Implicit Length Strings, Return Index" url-ref="felixcloutier.com/x86/PCMPISTRI.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" suppressed="1" type="reg" w="1">ECX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="5" name="REG2" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="3.00" latency="17" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="28" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="3.00" latency="17" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="27" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="17" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p1+1*p23" TP_ports="2.00"/>
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="3.0" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="3.0" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="17" cycles_addr_index="17" cycles_mem="28" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="17" cycles_addr_index="17"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="3.0" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="3.0" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23A" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_index="16" cycles_mem="31" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="8" complex_decoder="1" TP_unrolled="13.00" TP_loop="12.67" uops="8">
          <latency start_op="1" target_op="4" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="17" cycles_addr_index="17" cycles_mem="24" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="19" cycles_addr_index="19"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="3">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3">
          <latency start_op="1" target_op="4" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="14" cycles_addr_index="14" cycles_mem="23" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="14" cycles_addr_index="14"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP1+2*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="4" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="14" cycles_addr_index="14" cycles_mem="27" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="14" cycles_addr_index="14"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="1*FP1+2*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="4" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="14" cycles_addr_index="14" cycles_mem="29" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="14" cycles_addr_index="14"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPISTRI" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPISTRI" iform="VPCMPISTRI_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPISTRI (XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VPCMPISTRI_XMM_XMM_I8.html" summary="Packed Compare Implicit Length Strings, Return Index" url-ref="felixcloutier.com/x86/PCMPISTRI.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" suppressed="1" type="reg" w="1">ECX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="5" name="REG3" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="3.00" latency="11" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="3.00" latency="11" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="11" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p1" TP_ports="2.00"/>
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="3.0" TP="2.92" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="3.0" TP="2.91" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="3.0" TP="2.92" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="3.0" TP="2.92" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
        <doc TP="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="7" complex_decoder="1" TP_unrolled="11.00" TP_loop="10.80" uops="8">
          <latency start_op="1" target_op="4" cycles="16" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="18" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
        <doc uops="ucode" latency="2" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="1" target_op="4" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP01+2*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="4" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP01+2*FP45" TP_ports="1.00">
          <latency start_op="1" target_op="4" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPISTRM" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPISTRM" iform="VPCMPISTRM_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPISTRM (XMM, M128, I8)" vex="1" url="uops.info/html-instr/VPCMPISTRM_XMM_M128_I8.html" summary="Packed Compare Implicit Length Strings, Return Mask" url-ref="felixcloutier.com/x86/PCMPISTRM.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="5" name="REG2" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="3.00" latency="17" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="2.2" TP="3.00" fusion_occurred="1" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00" TP_indexed="3.00" uops_indexed="4" ports_indexed="3*p0+1*p23" TP_ports_indexed="3.00"/>
        <IACA version="2.3" TP="3.00" fusion_occurred="1" uops="4" ports="3*p0+1*p23" TP_ports="3.00" TP_indexed="3.00" uops_indexed="4" ports_indexed="3*p0+1*p23" TP_ports_indexed="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="3.00" latency="17" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <IACA version="2.2" TP="3.00" fusion_occurred="1" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00" TP_indexed="3.00" uops_indexed="4" ports_indexed="3*p0+1*p23" TP_ports_indexed="3.00"/>
        <IACA version="2.3" TP="3.00" fusion_occurred="1" uops="4" ports="3*p0+1*p23" TP_ports="3.00" TP_indexed="3.00" uops_indexed="4" ports_indexed="3*p0+1*p23" TP_ports_indexed="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="17" TP_no_interiteration="2.00" uops="4" ports="2*p0+1*p1+1*p23" TP_ports="2.00"/>
        <IACA version="2.2" TP="3.00" fusion_occurred="1" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00" TP_indexed="3.00" uops_indexed="4" ports_indexed="3*p0+1*p23" TP_ports_indexed="3.00"/>
        <IACA version="2.3" TP="3.00" fusion_occurred="1" uops="4" ports="3*p0+1*p23" TP_ports="3.00" TP_indexed="3.00" uops_indexed="4" ports_indexed="3*p0+1*p23" TP_ports_indexed="3.00"/>
        <IACA version="3.0" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="3.00" fusion_occurred="1" TP_no_interiteration="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00" TP_indexed="3.00" uops_indexed="4" ports_indexed="3*p0+1*p23" TP_ports_indexed="3.00"/>
        <IACA version="2.3" TP="3.00" fusion_occurred="1" uops="4" ports="3*p0+1*p23" TP_ports="3.00" TP_indexed="3.00" uops_indexed="4" ports_indexed="3*p0+1*p23" TP_ports_indexed="3.00"/>
        <IACA version="3.0" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="10"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="17" cycles_addr_index="17"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="3.00" fusion_occurred="1" uops="4" ports="3*p0+1*p23" TP_ports="3.00" TP_indexed="3.00" uops_indexed="4" ports_indexed="3*p0+1*p23" TP_ports_indexed="3.00"/>
        <IACA version="3.0" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="3.00" fusion_occurred="1" uops="4" ports="3*p0+1*p23" TP_ports="3.00" TP_indexed="3.00" uops_indexed="4" ports_indexed="3*p0+1*p23" TP_ports_indexed="3.00"/>
        <IACA version="3.0" TP="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00"/>
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
        <doc TP="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="1" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="4" uops_MITE="4" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="4" ports="3*p0+1*p23A" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="16" cycles_addr_index="16"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="7" complex_decoder="1" TP_unrolled="11.00" TP_loop="10.80" uops="7">
          <latency start_op="1" target_op="4" cycles="7"/>
          <latency start_op="1" target_op="5" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="19" cycles_addr_index="19"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="7" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4">
          <latency start_op="1" target_op="4" cycles="7"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="14" cycles_addr_index="14"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP01+1*FP12+1*FP45" TP_ports="0.67">
          <latency start_op="1" target_op="4" cycles="6"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="14" cycles_addr_is_upper_bound="1" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="14" cycles_addr_index="14"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="4" ports="1*FP01+1*FP12+1*FP45" TP_ports="0.67">
          <latency start_op="1" target_op="4" cycles="7"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles_addr="15" cycles_addr_is_upper_bound="1" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1"/>
          <latency start_op="2" target_op="5" cycles_addr="14" cycles_addr_index="14"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPISTRM" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPISTRM" iform="VPCMPISTRM_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPISTRM (XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VPCMPISTRM_XMM_XMM_I8.html" summary="Packed Compare Implicit Length Strings, Return Mask" url-ref="felixcloutier.com/x86/PCMPISTRM.html">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="5" name="REG3" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="3.00" latency="11" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="10" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="3.00" latency="11" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="10" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="2.00" latency="11" TP_no_interiteration="2.00" uops="3" ports="2*p0+1*p1" TP_ports="2.00"/>
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="3.0" TP="2.92" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="10" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="3.00" TP_no_interiteration="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="3.0" TP="2.91" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="0" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="10"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="10"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="3.0" TP="2.92" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="9"/>
          <latency start_op="1" target_op="5" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="8"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="3.00" uops="3" ports="3*p0" TP_ports="3.00"/>
        <IACA version="3.0" TP="2.92" uops="3" ports="3*p0" TP_ports="3.00"/>
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="9"/>
          <latency start_op="1" target_op="5" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="8"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="8" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="9"/>
          <latency start_op="1" target_op="5" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="8"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="8"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="9"/>
          <latency start_op="1" target_op="5" cycles="12" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="8"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="8"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
        <doc TP="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="8"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="8"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="3" uops_MITE="3" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="3.00" TP_loop="3.00" uops="3" ports="3*p0" TP_ports="3.00">
          <latency start_op="1" target_op="4" cycles="8"/>
          <latency start_op="1" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="8"/>
          <latency start_op="2" target_op="5" cycles="11" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="6" complex_decoder="1" TP_unrolled="9.00" TP_loop="8.83" uops="7">
          <latency start_op="1" target_op="4" cycles="7"/>
          <latency start_op="1" target_op="5" cycles="17" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="7"/>
          <latency start_op="2" target_op="5" cycles="18" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="3">
          <latency cycles="7" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="7" start_op="2" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
        <doc uops="ucode" latency="2" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3">
          <latency start_op="1" target_op="4" cycles="7"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="7"/>
          <latency start_op="2" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*FP01+1*FP12+1*FP45" TP_ports="0.67">
          <latency start_op="1" target_op="4" cycles="6"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="6"/>
          <latency start_op="2" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="3" ports="1*FP01+1*FP12+1*FP45" TP_ports="0.67">
          <latency start_op="1" target_op="4" cycles="7"/>
          <latency start_op="1" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
          <latency start_op="2" target_op="4" cycles="7"/>
          <latency start_op="2" target_op="5" cycles="10" cycles_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERM2F128" category="AVX" cpl="3" extension="AVX" iclass="VPERM2F128" iform="VPERM2F128_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" string="VPERM2F128 (YMM, YMM, M256, I8)" vex="1" url="uops.info/html-instr/VPERM2F128_YMM_YMM_M256_I8.html" summary="Permute Floating-Point Values" url-ref="felixcloutier.com/x86/VPERM2F128.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="10" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc TP="1.0" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles_addr="16" cycles_addr_is_upper_bound="1" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERM2F128" category="AVX" cpl="3" extension="AVX" iclass="VPERM2F128" iform="VPERM2F128_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" string="VPERM2F128 (YMM, YMM, YMM, I8)" vex="1" url="uops.info/html-instr/VPERM2F128_YMM_YMM_YMM_I8.html" summary="Permute Floating-Point Values" url-ref="felixcloutier.com/x86/VPERM2F128.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="3" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="2.00" TP_loop="2.00" uops="2">
          <latency start_op="2" target_op="1" cycles="6"/>
          <latency start_op="3" target_op="1" cycles="6"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="8">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode" latency="3" TP="3.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP2" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPERMILPD (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPERMILPD_XMM_XMM_M128.html" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/VPERMILPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_ports="0.50" TP_unrolled="2.00" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPERMILPD (XMM, XMM, XMM)" vex="1" url="uops.info/html-instr/VPERMILPD_XMM_XMM_XMM.html" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/VPERMILPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_ports="0.50" TP_unrolled="2.00" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="3" TP="2.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
          <latency start_op="3" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VPERMILPD (YMM, YMM, M256)" vex="1" url="uops.info/html-instr/VPERMILPD_YMM_YMM_M256.html" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/VPERMILPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="u64"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles_addr="10" cycles_addr_is_upper_bound="1" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VPERMILPD (YMM, YMM, YMM)" vex="1" url="uops.info/html-instr/VPERMILPD_YMM_YMM_YMM.html" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/VPERMILPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
          <latency start_op="3" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2" latency="4" TP="4.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="2.00" TP_loop="2.00" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
        <doc uops="2" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP01" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="5"/>
          <latency start_op="3" target_op="1" cycles="5"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="2"/>
          <latency start_op="3" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPERMILPD (XMM, M128, I8)" vex="1" url="uops.info/html-instr/VPERMILPD_XMM_M128_I8.html" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/VPERMILPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="7" cycles_addr_is_upper_bound="1" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.50" TP_loop="0.50" uops="1">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPERMILPD (XMM, XMM, I8)" vex="1" url="uops.info/html-instr/VPERMILPD_XMM_XMM_I8.html" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/VPERMILPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.33" TP_loop="0.33" uops="1">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.37" TP_loop="0.33" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_YMMqq_MEMqq_IMMb" isa-set="AVX" string="VPERMILPD (YMM, M256, I8)" vex="1" url="uops.info/html-instr/VPERMILPD_YMM_M256_I8.html" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/VPERMILPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="8" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" fusion_occurred="1" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="2" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="8" cycles_addr_is_upper_bound="1" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" fusion_occurred="1" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="3.0" TP="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
        <doc latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="3" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="2" uops_MITE="2" uops_MS="0" complex_decoder="1" available_simple_decoders="4" TP_unrolled="1.00" TP_loop="1.00" uops="2" ports="1*p23A+1*p5" TP_ports="1.00" uops_retire_slots_indexed="2" uops_MITE_indexed="2" uops_MS_indexed="0" complex_decoder_indexed="1" available_simple_decoders_indexed="4" TP_unrolled_indexed="1.00" TP_loop_indexed="1.00" uops_indexed="2" ports_indexed="1*p23A" TP_ports_indexed="0.33">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="2">
          <latency start_op="2" target_op="1" cycles_addr="11" cycles_addr_is_upper_bound="1" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles_addr="9" cycles_addr_is_upper_bound="1" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_YMMqq_YMMqq_IMMb" isa-set="AVX" string="VPERMILPD (YMM, YMM, I8)" vex="1" url="uops.info/html-instr/VPERMILPD_YMM_YMM_I8.html" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/VPERMILPD.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA version="2.1" TP="1.00" latency="1" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA version="2.2" TP="1.00" TP_no_interiteration="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA version="2.3" TP="1.00" uops="1" ports="1*p5" TP_ports="1.00"/>
        <IACA version="3.0" TP="0.98" uops="1" ports="1*p5" TP_ports="1.00"/>
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-P">
        <measurement uops_retire_slots="1" uops_MITE="1" uops_MS="0" TP_unrolled="1.00" TP_loop="1.00" uops="1" ports="1*p5" TP_ports="1.00">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
      <architecture name="ADL-E">
        <measurement uops_MS="0" TP_unrolled="0.67" TP_loop="0.67" uops="2">
          <latency start_op="2" target_op="1" cycles="2"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc uops="2" ports="FP1/2" latency="1" TP="1.00"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="2" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_unrolled="0.50" TP_loop="0.50" uops="1" ports="1*FP12" TP_ports="0.50">
          <latency start_op="2" target_op="1" cycles="3"/>
        </measurement>
        <doc uops="1" ports="FP1/2" latency="1" TP="0.50"/>
      </architecture>
      <architecture name="ZEN4">
        <measurement TP_unrolled="0.36" TP_loop="0.33" uops="1" ports="1*FP123" TP_ports="0.33">
          <latency start_op="2" target_op="1" cycles="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPS" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPS" iform="VPERMILPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPERMILPS (XMM, XMM, M128)" vex="1" url="uops.info/html-instr/VPERMILPS_XMM_XMM_M128.html" summary="Permute In-Lane of Quadruples of Single-Precision Floating-Point Values" url-ref="felixcloutier.com/x86/VPERMILPS.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u32"/>
      <architecture name="SNB">
        <IACA version="2.1" TP="1.00" fusion_occurred="1" latency="7" TP_no_interiteration="1.00" uops="2" ports="1*p23+1*p5" TP_ports="1.00" TP_indexed="1.00" uops_indexed="2" ports_indexed="1*p23+1*p5" TP_ports_indexed="1.00"/>
        <IACA version="2.2" TP="1.00" fusion_occurred="1" 